コード例 #1
0
ファイル: comm_util.c プロジェクト: arm-hpc/miniAMR
// check sizes of send and recv buffers and adjust, if necessary
void check_buff_size(void)
{
   int i, j, max_send, max_comm, max_recv;

   for (max_send = max_comm = max_recv = i = 0; i < 3; i++) {
      if (nonblocking) {
         if (s_buf_num[i] > max_send)
            max_send = s_buf_num[i];
      } else
         for (j = 0; j < num_comm_partners[i]; j++)
            if (send_size[i][j] > max_send)
               max_send = send_size[i][j];
      if (num_comm_partners[i] > max_comm)
         max_comm = num_comm_partners[i];
      if (r_buf_num[i] > max_recv)
         max_recv = r_buf_num[i];
   }

   if (max_send > s_buf_size) {
      s_buf_size = (int) (2.0*((double) max_send));
      free(send_buff);
      send_buff = (double *) ma_malloc(s_buf_size*sizeof(double),
                                       __FILE__, __LINE__);
   }

   if (max_recv > r_buf_size) {
      r_buf_size = (int) (2.0*((double) max_recv));
      free(recv_buff);
      recv_buff = (double *) ma_malloc(r_buf_size*sizeof(double),
                                       __FILE__, __LINE__);
   }

   if (max_comm > max_num_req) {
      free(request);
      max_num_req = (int) (2.0*((double) max_comm));
      request = (MPI_Request *) ma_malloc(max_num_req*sizeof(MPI_Request),
                                          __FILE__, __LINE__);
      if (nonblocking) {
         free(s_req);
         s_req = (MPI_Request *) ma_malloc(max_num_req*sizeof(MPI_Request),
                                           __FILE__, __LINE__);
      }
   }
}
コード例 #2
0
static int ssl_thread_init()
{
  int i, max= CRYPTO_num_locks();

  if (LOCK_crypto == NULL)
  {
    if (!(LOCK_crypto= 
          (pthread_mutex_t *)ma_malloc(sizeof(pthread_mutex_t) * max, MYF(0))))
      return 1;

    for (i=0; i < max; i++)
      pthread_mutex_init(&LOCK_crypto[i], NULL);
  }

#if (OPENSSL_VERSION_NUMBER < 0x10000000) 
  CRYPTO_set_id_callback(my_cb_threadid);
#else
  CRYPTO_THREADID_set_callback(my_cb_threadid);
#endif
  CRYPTO_set_locking_callback(my_cb_locking);

  return 0;
}
コード例 #3
0
ファイル: comm_util.c プロジェクト: arm-hpc/miniAMR
// Routines to add and delete entries from the communication list that is
// used to exchange values for ghost cells.
void add_comm_list(int dir, int block_f, int pe, int fcase, int pos, int pos1)
{
   int i, j, s_len, r_len, *tmp;

   /* set indexes for send and recieve to determine length of message:
    * for example, if we send a whole face to a quarter face, we will
    * recieve a message sent from a quarter face to a whole face and
    * use 2 as index for the send and 3 for the recv.
    * We can use same index except for offset */
   if (fcase >= 10)    /* +- direction encoded in fcase */
      i = fcase - 10;
   else
      i = fcase;
   switch (i) {
      case 0: s_len = r_len = comm_vars*msg_len[dir][0];
              break;
      case 1: s_len = r_len = comm_vars*msg_len[dir][1];
              break;
      case 2:
      case 3:
      case 4:
      case 5: s_len = comm_vars*msg_len[dir][2];
              r_len = comm_vars*msg_len[dir][3];
              break;
      case 6:
      case 7:
      case 8:
      case 9: s_len = comm_vars*msg_len[dir][3];
              r_len = comm_vars*msg_len[dir][2];
              break;
   }

   for (i = 0; i < num_comm_partners[dir]; i++)
      if (comm_partner[dir][i] >= pe)
         break;

   /* i is being used below as an index where information about this
    * block should go */
   if (i < num_comm_partners[dir] && comm_partner[dir][i] == pe) {
      send_size[dir][i] += s_len;
      recv_size[dir][i] += r_len;
      for (j = num_comm_partners[dir]-1; j > i; j--)
         comm_index[dir][j]++;
      comm_num[dir][i]++;
   } else {
      // make sure arrays are long enough
      // move stuff i and above up one
      if (num_comm_partners[dir] == max_comm_part[dir]) {
         max_comm_part[dir] = (int)(2.0*((double) (num_comm_partners[dir]+1)));
         tmp = (int *) ma_malloc(max_comm_part[dir]*sizeof(int),
                                 __FILE__, __LINE__);
         for (j = 0; j < i; j++)
            tmp[j] = comm_partner[dir][j];
         for (j = i; j < num_comm_partners[dir]; j++)
            tmp[j+1] = comm_partner[dir][j];
         free(comm_partner[dir]);
         comm_partner[dir] = tmp;
         tmp = (int *) ma_malloc(max_comm_part[dir]*sizeof(int),
                                 __FILE__, __LINE__);
         for (j = 0; j < i; j++)
            tmp[j] = send_size[dir][j];
         for (j = i; j < num_comm_partners[dir]; j++)
            tmp[j+1] = send_size[dir][j];
         free(send_size[dir]);
         send_size[dir] = tmp;
         tmp = (int *) ma_malloc(max_comm_part[dir]*sizeof(int),
                                 __FILE__, __LINE__);
         for (j = 0; j < i; j++)
            tmp[j] = recv_size[dir][j];
         for (j = i; j < num_comm_partners[dir]; j++)
            tmp[j+1] = recv_size[dir][j];
         free(recv_size[dir]);
         recv_size[dir] = tmp;
         tmp = (int *) ma_malloc(max_comm_part[dir]*sizeof(int),
                                 __FILE__, __LINE__);
         for (j = 0; j <= i; j++)   // Note that this one is different
            tmp[j] = comm_index[dir][j];
         for (j = i; j < num_comm_partners[dir]; j++)
            tmp[j+1] = comm_index[dir][j] + 1;
         free(comm_index[dir]);
         comm_index[dir] = tmp;
         tmp = (int *) ma_malloc(max_comm_part[dir]*sizeof(int),
                                 __FILE__, __LINE__);
         for (j = 0; j < i; j++)
            tmp[j] = comm_num[dir][j];
         for (j = i; j < num_comm_partners[dir]; j++)
            tmp[j+1] = comm_num[dir][j];
         free(comm_num[dir]);
         comm_num[dir] = tmp;
      } else {
         for (j = num_comm_partners[dir]; j > i; j--) {
            comm_partner[dir][j] = comm_partner[dir][j-1];
            send_size[dir][j] = send_size[dir][j-1];
            recv_size[dir][j] = recv_size[dir][j-1];
            comm_index[dir][j] = comm_index[dir][j-1] + 1;
            comm_num[dir][j] = comm_num[dir][j-1];
         }
      }

      if (i == num_comm_partners[dir])
         if (i == 0)
            comm_index[dir][i] = 0;
         else
            comm_index[dir][i] = comm_index[dir][i-1] + comm_num[dir][i-1];
      num_comm_partners[dir]++;
      comm_partner[dir][i] = pe;
      send_size[dir][i] = s_len;
      recv_size[dir][i] = r_len;
      comm_num[dir][i] = 1;  // still have to put info into arrays
   }

   if ((num_cases[dir]+1) > max_num_cases[dir]) {
      max_num_cases[dir] = (int)(2.0*((double) (num_cases[dir]+1)));
      tmp = (int *) ma_malloc(max_num_cases[dir]*sizeof(int),
                              __FILE__, __LINE__);
      for (j = 0; j < num_cases[dir]; j++)
         tmp[j] = comm_block[dir][j];
      free(comm_block[dir]);
      comm_block[dir] = tmp;
      tmp = (int *) ma_malloc(max_num_cases[dir]*sizeof(int),
                              __FILE__, __LINE__);
      for (j = 0; j < num_cases[dir]; j++)
         tmp[j] = comm_face_case[dir][j];
      free(comm_face_case[dir]);
      comm_face_case[dir] = tmp;
      tmp = (int *) ma_malloc(max_num_cases[dir]*sizeof(int),
                              __FILE__, __LINE__);
      for (j = 0; j < num_cases[dir]; j++)
         tmp[j] = comm_pos[dir][j];
      free(comm_pos[dir]);
      comm_pos[dir] = tmp;
      tmp = (int *) ma_malloc(max_num_cases[dir]*sizeof(int),
                              __FILE__, __LINE__);
      for (j = 0; j < num_cases[dir]; j++)
         tmp[j] = comm_pos1[dir][j];
      free(comm_pos1[dir]);
      comm_pos1[dir] = tmp;
      tmp = (int *) ma_malloc(max_num_cases[dir]*sizeof(int),
                              __FILE__, __LINE__);
      for (j = 0; j < num_cases[dir]; j++)
         tmp[j] = comm_send_off[dir][j];
      free(comm_send_off[dir]);
      comm_send_off[dir] = tmp;
      tmp = (int *) ma_malloc(max_num_cases[dir]*sizeof(int),
                              __FILE__, __LINE__);
      for (j = 0; j < num_cases[dir]; j++)
         tmp[j] = comm_recv_off[dir][j];
      free(comm_recv_off[dir]);
      comm_recv_off[dir] = tmp;
   }
   if (comm_index[dir][i] == num_cases[dir]) {
      // at end
      comm_block[dir][num_cases[dir]] = block_f;
      comm_face_case[dir][num_cases[dir]] = fcase;
      comm_pos[dir][num_cases[dir]] = pos;
      comm_pos1[dir][num_cases[dir]] = pos1;
      comm_send_off[dir][num_cases[dir]] = s_buf_num[dir];
      comm_recv_off[dir][num_cases[dir]] = r_buf_num[dir];
   } else {
      for (j = num_cases[dir]; j > comm_index[dir][i]+comm_num[dir][i]-1; j--){
         comm_block[dir][j] = comm_block[dir][j-1];
         comm_face_case[dir][j] = comm_face_case[dir][j-1];
         comm_pos[dir][j] = comm_pos[dir][j-1];
         comm_pos1[dir][j] = comm_pos1[dir][j-1];
         comm_send_off[dir][j] = comm_send_off[dir][j-1] + s_len;
         comm_recv_off[dir][j] = comm_recv_off[dir][j-1] + r_len;
      }
      for (j = comm_index[dir][i]+comm_num[dir][i]-1;
           j >= comm_index[dir][i]; j--)
         if (j == comm_index[dir][i] || comm_pos[dir][j-1] < pos ||
             (comm_pos[dir][j-1] == pos && comm_pos1[dir][j-1] < pos1)) {
            comm_block[dir][j] = block_f;
            comm_face_case[dir][j] = fcase;
            comm_pos[dir][j] = pos;
            comm_pos1[dir][j] = pos1;
            if (j == num_cases[dir]) {
               comm_send_off[dir][j] = s_buf_num[dir];
               comm_recv_off[dir][j] = r_buf_num[dir];
            }
            // else comm_[send,recv]_off[j] values are correct
            break;
         } else {
            comm_block[dir][j] = comm_block[dir][j-1];
            comm_face_case[dir][j] = comm_face_case[dir][j-1];
            comm_pos[dir][j] = comm_pos[dir][j-1];
            comm_pos1[dir][j] = comm_pos1[dir][j-1];
            comm_send_off[dir][j] = comm_send_off[dir][j-1] + s_len;
            comm_recv_off[dir][j] = comm_recv_off[dir][j-1] + r_len;
         }
   }
   num_cases[dir]++;
   s_buf_num[dir] += s_len;
   r_buf_num[dir] += r_len;
}
コード例 #4
0
ファイル: init.c プロジェクト: arm-hpc/miniAMR
// Initialize the problem and setup initial blocks.
void init(void)
{
   int n, var, i, j, k, l, m, o, size, dir, i1, i2, j1, j2, k1, k2, ib, jb, kb;
   int start[num_pes], pos[3][num_pes], pos1[npx][npy][npz], set,
       num, npx1, npy1, npz1, pes, fact, fac[25], nfac, f;
   block *bp;

   tol = pow(10.0, ((double) -error_tol));

   p2[0] = p8[0] = 1;
   for (i = 0; i < (num_refine+1); i++) {
      p8[i+1] = p8[i]*8;
      p2[i+1] = p2[i]*2;
      sorted_index[i] = 0;
   }
   sorted_index[num_refine+1] = 0;
   block_start[0] = 0;
   local_max_b = global_max_b =  init_block_x*init_block_y*init_block_z;
   num = num_pes*global_max_b;
   for (i = 1; i <= num_refine; i++) {
      block_start[i] = block_start[i-1] + num;
      num *= 8;
      num_blocks[i] = 0;
      local_num_blocks[i] = 0;
   }

   /* initialize for communication arrays, which are initialized below */
   zero_comm_list();

   x_block_half = x_block_size/2;
   y_block_half = y_block_size/2;
   z_block_half = z_block_size/2;

   if (!code) {
      /* for E/W (X dir) messages:
         0: whole -> whole (7), 1: whole -> whole (27),
         2: whole -> quarter, 3: quarter -> whole */
      msg_len[0][0] = msg_len[0][1] = y_block_size*z_block_size;
      msg_len[0][2] = msg_len[0][3] = y_block_half*z_block_half;
      /* for N/S (Y dir) messages */
      msg_len[1][0] = x_block_size*z_block_size;
      msg_len[1][1] = (x_block_size+2)*z_block_size;
      msg_len[1][2] = msg_len[1][3] = x_block_half*z_block_half;
      /* for U/D (Z dir) messages */
      msg_len[2][0] = x_block_size*y_block_size;
      msg_len[2][1] = (x_block_size+2)*(y_block_size+2);
      msg_len[2][2] = msg_len[2][3] = x_block_half*y_block_half;
   } else if (code == 1) {
      /* for E/W (X dir) messages */
      msg_len[0][0] = msg_len[0][1] = (y_block_size+2)*(z_block_size+2);
      msg_len[0][2] = (y_block_half+1)*(z_block_half+1);
      msg_len[0][3] = (y_block_half+2)*(z_block_half+2);
      /* for N/S (Y dir) messages */
      msg_len[1][0] = msg_len[1][1] = (x_block_size+2)*(z_block_size+2);
      msg_len[1][2] = (x_block_half+1)*(z_block_half+1);
      msg_len[1][3] = (x_block_half+2)*(z_block_half+2);
      /* for U/D (Z dir) messages */
      msg_len[2][0] = msg_len[2][1] = (x_block_size+2)*(y_block_size+2);
      msg_len[2][2] = (x_block_half+1)*(y_block_half+1);
      msg_len[2][3] = (x_block_half+2)*(y_block_half+2);
   } else {
      /* for E/W (X dir) messages */
      msg_len[0][0] = msg_len[0][1] = (y_block_size+2)*(z_block_size+2);
      msg_len[0][2] = (y_block_half+1)*(z_block_half+1);
      msg_len[0][3] = (y_block_size+2)*(z_block_size+2);
      /* for N/S (Y dir) messages */
      msg_len[1][0] = msg_len[1][1] = (x_block_size+2)*(z_block_size+2);
      msg_len[1][2] = (x_block_half+1)*(z_block_half+1);
      msg_len[1][3] = (x_block_size+2)*(z_block_size+2);
      /* for U/D (Z dir) messages */
      msg_len[2][0] = msg_len[2][1] = (x_block_size+2)*(y_block_size+2);
      msg_len[2][2] = (x_block_half+1)*(y_block_half+1);
      msg_len[2][3] = (x_block_size+2)*(y_block_size+2);
   }

   /* Determine position of each core in initial mesh */
   npx1 = npx;
   npy1 = npy;
   npz1 = npz;
   for (i = 0; i < 3; i++)
      for (j = 0; j < num_pes; j++)
         pos[i][j] = 0;
   nfac = factor(num_pes, fac);
   max_num_req = num_pes;
   request = (MPI_Request *) ma_malloc(max_num_req*sizeof(MPI_Request),
                                       __FILE__, __LINE__);
   if (nonblocking)
      s_req = (MPI_Request *) ma_malloc(max_num_req*sizeof(MPI_Request),
                                        __FILE__, __LINE__);
   pes = 1;
   start[0] = 0;
   num = num_pes;
   comms = (MPI_Comm *) ma_malloc((nfac+1)*sizeof(MPI_Comm),
                                  __FILE__, __LINE__);
   me = (int *) ma_malloc((nfac+1)*sizeof(int), __FILE__, __LINE__);
   np = (int *) ma_malloc((nfac+1)*sizeof(int), __FILE__, __LINE__);
   comms[0] = MPI_COMM_WORLD;
   me[0] = my_pe;
   np[0] = num_pes;
   // initialize
   for (n = 0, i = nfac; i > 0; i--, n++) {
      fact = fac[i-1];
      dir = find_dir(fact, npx1, npy1, npz1);
      if (dir == 0)
         npx1 /= fact;
      else
         if (dir == 1)
            npy1 /= fact;
         else
            npz1 /= fact;
      num /= fact;
      set = me[n]/num;
      MPI_Comm_split(comms[n], set, me[n], &comms[n+1]);
      MPI_Comm_rank(comms[n+1], &me[n+1]);
      MPI_Comm_size(comms[n+1], &np[n+1]);
      for (j = pes-1; j >= 0; j--)
         for (k = 0; k < fact; k++) {
            m = j*fact + k;
            if (!k)
               start[m] = start[j];
            else
               start[m] = start[m-1] + num;
            for (l = start[m], o = 0; o < num; l++, o++)
               pos[dir][l] = pos[dir][l]*fact + k;
         }
      pes *= fact;
   }
   for (i = 0; i < num_pes; i++)
      pos1[pos[0][i]][pos[1][i]][pos[2][i]] = i;

   max_active_block = init_block_x*init_block_y*init_block_z;
   num_active = max_active_block;
   global_active = num_active*num_pes;
   num_parents = max_active_parent = 0;
   size = p2[num_refine+1];  /* block size is p2[num_refine+1-level]
                              * smallest block is size p2[1], so can find
                              * its center */
   mesh_size[0] = npx*init_block_x*size;
   max_mesh_size = mesh_size[0];
   mesh_size[1] = npy*init_block_y*size;
   if (mesh_size[1] > max_mesh_size)
      max_mesh_size = mesh_size[1];
   mesh_size[2] = npz*init_block_z*size;
   if (mesh_size[2] > max_mesh_size)
      max_mesh_size = mesh_size[2];
   if ((num_pes+1) > max_mesh_size)
      max_mesh_size = num_pes + 1;
   bin  = (int *) ma_malloc(max_mesh_size*sizeof(int), __FILE__, __LINE__);
   gbin = (int *) ma_malloc(max_mesh_size*sizeof(int), __FILE__, __LINE__);
   if (stencil == 7)
      f = 0;
   else
      f = 1;
   for (o = n = k1 = k = 0; k < npz; k++)
      for (k2 = 0; k2 < init_block_z; k1++, k2++)
         for (j1 = j = 0; j < npy; j++)
            for (j2 = 0; j2 < init_block_y; j1++, j2++)
               for (i1 = i = 0; i < npx; i++)
                  for (i2 = 0; i2 < init_block_x; i1++, i2++, n++) {
                     m = pos1[i][j][k];
                     if (m == my_pe) {
                        bp = &blocks[o];
                        bp->level = 0;
                        bp->number = n;
                        bp->parent = -1;
                        bp->cen[0] = i1*size + size/2;
                        bp->cen[1] = j1*size + size/2;
                        bp->cen[2] = k1*size + size/2;
                        add_sorted_list(o, n, 0);
                        for (var = 0; var < num_vars; var++)
                           for (ib = 1; ib <= x_block_size; ib++)
                              for (jb = 1; jb <= y_block_size; jb++)
                                 for (kb = 1; kb <= z_block_size; kb++)
                                    bp->array[var][ib][jb][kb] =
                                       ((double) rand())/((double) RAND_MAX);
                        if (i2 == 0)
                           if (i == 0) { /* 0 boundary */
                              bp->nei_level[0] = -2;
                              bp->nei[0][0][0] = 0;
                           } else {      /* boundary with neighbor core */
                              bp->nei_level[0] = 0;
                              bp->nei[0][0][0] = -1 - pos1[i-1][j][k];
                              add_comm_list(0, o, pos1[i-1][j][k], 0+f,
                                            bp->cen[2]*mesh_size[1]+bp->cen[1],
                                            bp->cen[0] - size/2);
                           }
                        else {          /* neighbor on core */
                           bp->nei_level[0] = 0;
                           bp->nei[0][0][0] = o - 1;
                        }
                        bp->nei_refine[0] = 0;
                        if (i2 == (init_block_x - 1))
                           if (i == (npx - 1)) { /* 1 boundary */
                              bp->nei_level[1] = -2;
                              bp->nei[1][0][0] = 0;
                           } else {      /* boundary with neighbor core */
                              bp->nei_level[1] = 0;
                              bp->nei[1][0][0] = -1 - pos1[i+1][j][k];
                              add_comm_list(0, o, pos1[i+1][j][k], 10+f,
                                            bp->cen[2]*mesh_size[1]+bp->cen[1],
                                            bp->cen[0] + size/2);
                           }
                        else {          /* neighbor on core */
                           bp->nei_level[1] = 0;
                           bp->nei[1][0][0] = o + 1;
                        }
                        bp->nei_refine[1] = 0;
                        if (j2 == 0)
                           if (j == 0) { /* 0 boundary */
                              bp->nei_level[2] = -2;
                              bp->nei[2][0][0] = 0;
                           } else {      /* boundary with neighbor core */
                              bp->nei_level[2] = 0;
                              bp->nei[2][0][0] = -1 - pos1[i][j-1][k];
                              add_comm_list(1, o, pos1[i][j-1][k], 0+f,
                                            bp->cen[2]*mesh_size[0]+bp->cen[0],
                                            bp->cen[1] - size/2);
                           }
                        else {          /* neighbor on core */
                           bp->nei_level[2] = 0;
                           bp->nei[2][0][0] = o - init_block_x;
                        }
                        bp->nei_refine[2] = 0;
                        if (j2 == (init_block_y - 1))
                           if (j == (npy - 1)) { /* 1 boundary */
                              bp->nei_level[3] = -2;
                              bp->nei[3][0][0] = 0;
                           } else {      /* boundary with neighbor core */
                              bp->nei_level[3] = 0;
                              bp->nei[3][0][0] = -1 - pos1[i][j+1][k];
                              add_comm_list(1, o, pos1[i][j+1][k], 10+f,
                                            bp->cen[2]*mesh_size[0]+bp->cen[0],
                                            bp->cen[1] + size/2);
                           }
                        else {          /* neighbor on core */
                           bp->nei_level[3] = 0;
                           bp->nei[3][0][0] = o + init_block_x;
                        }
                        bp->nei_refine[3] = 0;
                        if (k2 == 0)
                           if (k == 0) { /* 0 boundary */
                              bp->nei_level[4] = -2;
                              bp->nei[4][0][0] = 0;
                           } else {      /* boundary with neighbor core */
                              bp->nei_level[4] = 0;
                              bp->nei[4][0][0] = -1 - pos1[i][j][k-1];
                              add_comm_list(2, o, pos1[i][j][k-1], 0+f,
                                            bp->cen[1]*mesh_size[0]+bp->cen[0],
                                            bp->cen[2] - size/2);
                           }
                        else {          /* neighbor on core */
                           bp->nei_level[4] = 0;
                           bp->nei[4][0][0] = o - init_block_x*init_block_y;
                        }
                        bp->nei_refine[4] = 0;
                        if (k2 == (init_block_z - 1))
                           if (k == (npz - 1)) { /* 1 boundary */
                              bp->nei_level[5] = -2;
                              bp->nei[5][0][0] = 0;
                           } else {      /* boundary with neighbor core */
                              bp->nei_level[5] = 0;
                              bp->nei[5][0][0] = -1 - pos1[i][j][k+1];
                              add_comm_list(2, o, pos1[i][j][k+1], 10+f,
                                            bp->cen[1]*mesh_size[0]+bp->cen[0],
                                            bp->cen[2] + size/2);
                           }
                        else {          /* neighbor on core */
                           bp->nei_level[5] = 0;
                           bp->nei[5][0][0] = o + init_block_x*init_block_y;
                        }
                        bp->nei_refine[5] = 0;
                        o++;
                     }
                  }

   check_buff_size();

   for (var = 0; var < num_vars; var++)
      grid_sum[var] = check_sum(var);
}