Пример #1
0
  /**
   * Distribute the tunecache from node 0 to all other nodes.
   */
  static void broadcastTuneCache()
  {
#ifdef MULTI_GPU

    std::stringstream serialized;
    size_t size;

    if (comm_rank() == 0) {
      serializeTuneCache(serialized);
      size = serialized.str().length();
    }
    comm_broadcast(&size, sizeof(size_t));

    if (size > 0) {
      if (comm_rank() == 0) {
	comm_broadcast(const_cast<char *>(serialized.str().c_str()), size);
      } else {
	char *serstr = new char[size+1];
	comm_broadcast(serstr, size);
	serstr[size] ='\0'; // null-terminate
	serialized.str(serstr);
	deserializeTuneCache(serialized);
	delete[] serstr;
      }
    }
#endif
  }
mgasptr_t mgas_all_dmalloc(size_t size, size_t n_dims,
                           const size_t block_size[],
                           const size_t n_blocks[])
{
    mgas_barrier();

    size_t i;
    size_t whole_size = 1;
    for (i = 0; i < n_dims; i++) {
        whole_size *= block_size[i];
        whole_size *= n_blocks[i];
    }
    MGAS_CHECK(size == whole_size);
    
    mgas_proc_t me = globals_get_pid();
    gmt_t *gmt = globals_get_gmt();

    // allocate addresses
    mgasptr_t mp = MGAS_NULL;
    if (me == 0) {
        mp = gmt_alloc_dist(gmt, size);

        MGAS_CHECK(mp != MGAS_NULL);
        MGAS_ASSERT(mp >= MGASPTR_MIN);
    }

    comm_broadcast(&mp, sizeof(mgasptr_t), 0);

    dist_t dist;
    dist_init(&dist, n_dims, block_size, n_blocks);
    
    gmt_validate_dist(gmt, mp, size, &dist);

    mgas_barrier();
    return mp;
}