Exemplo n.º 1
0
int
main(int argc, char **argv) {

  // allocate 4 times as many elements as we need (sparse-ish data)
  element_t * elements = (element_t *)malloc(NUM_ELEMENTS * 4 * sizeof(element_t));  

  // allocate two arrays of pointers to elements
  unsigned num_elements = NUM_ELEMENTS;
  element_t **list = (element_t **)malloc(num_elements * sizeof(element_t *));  
  element_t **list2 = (element_t **)malloc(num_elements * sizeof(element_t *));  

  // point list entries at random elements
  for (int i = 0 ; i < num_elements ; i ++) {
	 int index = random() % (NUM_ELEMENTS * 4);
	 list[i] = &elements[index];
	 initialize_element(list[i]);
  }
  
  vp_t *vp = get_a_vp();  

  // iteratively process the elements
  for (int step = 0 ; step < NUM_STEPS ; step ++) {

	 // pick two random elements
	 element_t *e_cull = list[random() % num_elements];
	 element_t *e_update = list[random() % num_elements];

	 // select those elements close to the element of interest
	 unsigned num_elements2 = 0;
	 for (int i = 0 ; i < num_elements ; i ++) {
		if (!cull(list[i], e_cull)) {
		  list2[num_elements2] = list[i];
		  num_elements2 ++;
		}
	 }

	 for (int i = 0 ; i < num_elements2 ; i ++) {
		transform(list2[i], vp);
	 }

	 // update based on the update element
	 for (int i = 0 ; i < num_elements2 ; i ++) {
		update(list2[i], e_update);
	 }
  }
}
Exemplo n.º 2
0
void migrate_pre_process(void *data, int num_gid_entries, int num_lid_entries, 
                         int num_import, 
                         ZOLTAN_ID_PTR import_global_ids,
                         ZOLTAN_ID_PTR import_local_ids, int *import_procs,
                         int *import_to_part,
                         int num_export, ZOLTAN_ID_PTR export_global_ids,
                         ZOLTAN_ID_PTR export_local_ids, int *export_procs,
                         int *export_to_part,
                         int *ierr)
{
int lid = num_lid_entries-1;
int gid = num_gid_entries-1;
char msg[256];

  *ierr = ZOLTAN_OK;

  if (data == NULL) {
    *ierr = ZOLTAN_FATAL;
    return;
  }
  MESH_INFO_PTR mesh = (MESH_INFO_PTR) data;
  ELEM_INFO_PTR elements = mesh->elements;

  /*
   *  Set some flags. Assume if true for one element, true for all elements.
   *  Note that some procs may have no elements. 
   */

  int k = 0;

  if (elements[0].edge_wgt != NULL)
    k = 1;

  /* Make sure all procs have the same value */

  MPI_Allreduce(&k, &Use_Edge_Wgts, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);

  /*
   *  For all elements, update adjacent elements' processor information.
   *  That way, when perform migration, will be migrating updated adjacency
   *  information.  
   */
  
  int proc = 0;
  MPI_Comm_rank(MPI_COMM_WORLD, &proc);

  /*
   *  Build New_Elem_Index array and list of processor assignments.
   */

  New_Elem_Index_Size = mesh->num_elems + num_import - num_export;
  if (mesh->elem_array_len > New_Elem_Index_Size) 
    New_Elem_Index_Size = mesh->elem_array_len;

  New_Elem_Index = new ZOLTAN_ID_TYPE [New_Elem_Index_Size];

  int *proc_ids = NULL;
  char *change = NULL;

  if (mesh->num_elems > 0) {

    proc_ids = new int [mesh->num_elems];
    change   = new char [mesh->num_elems];

    if (New_Elem_Index == NULL || proc_ids == NULL || change == NULL) {
      Gen_Error(0, "fatal: insufficient memory");
      *ierr = ZOLTAN_MEMERR;
      if (proc_ids) delete [] proc_ids;
      if (change) delete [] change;
      if (New_Elem_Index)
        {
        delete [] New_Elem_Index;
        New_Elem_Index = NULL;
        }
      return;
    }

    for (int i = 0; i < mesh->num_elems; i++) {
      New_Elem_Index[i] = elements[i].globalID;
      proc_ids[i] = proc;
      change[i] = 0;
    }
  }

  for (int i = mesh->num_elems; i < New_Elem_Index_Size; i++) {
    New_Elem_Index[i] = ZOLTAN_ID_INVALID;
  }

  for (int i = 0; i < num_export; i++) {

    int exp_elem = 0;

    if (num_lid_entries)
      exp_elem = export_local_ids[lid+i*num_lid_entries];
    else  /* testing num_lid_entries == 0 */
      search_by_global_id(mesh, export_global_ids[gid+i*num_gid_entries], 
                          &exp_elem);

    if (export_procs[i] != proc) {
      /* Export is moving to a new processor */
      New_Elem_Index[exp_elem] = ZOLTAN_ID_INVALID;
      proc_ids[exp_elem] = export_procs[i];
    }
  }

  for (int i = 0; i < num_import; i++) {
    if (import_procs[i] != proc) {
      /* Import is moving from a new processor, not just from a new partition */
      /* search for first free location */
      int j=0;
      for (j = 0; j < New_Elem_Index_Size; j++) 
        if (New_Elem_Index[j] == ZOLTAN_ID_INVALID) break;

      New_Elem_Index[j] = import_global_ids[gid+i*num_gid_entries];
    }
  }

  /* 
   * Update local information 
   */

  /* Set change flag for elements whose adjacent elements are being exported */

  for (int i = 0; i < num_export; i++) {

    int exp_elem = 0;

    if (num_lid_entries)
      exp_elem = export_local_ids[lid+i*num_lid_entries];
    else  /* testing num_lid_entries == 0 */
      search_by_global_id(mesh, export_global_ids[gid+i*num_gid_entries], 
                          &exp_elem);

    elements[exp_elem].my_part = export_to_part[i];

    if (export_procs[i] == proc) 
      continue;  /* No adjacency changes needed if export is changing
                    only partition, not processor. */

    for (int j = 0; j < elements[exp_elem].adj_len; j++) {

      /* Skip NULL adjacencies (sides that are not adjacent to another elem). */
      if (elements[exp_elem].adj[j] == ZOLTAN_ID_INVALID) continue;

      /* Set change flag for adjacent local elements. */
      if (elements[exp_elem].adj_proc[j] == proc) {
        change[elements[exp_elem].adj[j]] = 1;
      }
    }
  }

  /* Change adjacency information in marked elements */
  for (int i = 0; i < mesh->num_elems; i++) {
    if (change[i] == 0) continue;

    /* loop over marked element's adjacencies; look for ones that are moving */
    for (int j = 0; j < elements[i].adj_len; j++) {

      /* Skip NULL adjacencies (sides that are not adjacent to another elem). */
      if (elements[i].adj[j] == ZOLTAN_ID_INVALID) continue;

      if (elements[i].adj_proc[j] == proc) {
        /* adjacent element is local; check whether it is moving. */
        int new_proc = proc_ids[elements[i].adj[j]];
        if (new_proc != proc) {
          /* Adjacent element is being exported; update this adjacency entry */
          elements[i].adj[j] = elements[elements[i].adj[j]].globalID;
          elements[i].adj_proc[j] = new_proc;
        }
      }
    }
  }
  delete [] change;

  /*
   * Update off-processor information 
   */

  int maxlen = 0;
  int *send_vec = NULL;

  for (int i = 0; i < mesh->necmap; i++) 
    maxlen += mesh->ecmap_cnt[i];

  if (maxlen > 0) {
    send_vec = new int [maxlen];
    if (send_vec == NULL) {
      Gen_Error(0, "fatal: insufficient memory");
      *ierr = ZOLTAN_MEMERR;
      delete [] proc_ids;
      delete [] change;
      return;
    }

    /* Load send vector */

    for (int i = 0; i < maxlen; i++)
      send_vec[i] = proc_ids[mesh->ecmap_elemids[i]];
  }

  delete [] proc_ids;

  int *recv_vec = NULL;

  if (maxlen > 0)
    recv_vec = new int [maxlen];

  /*  Perform boundary exchange */

  boundary_exchange(mesh, 1, send_vec, recv_vec);
  
  /* Unload receive vector */

  int offset = 0;
  for (int i = 0; i < mesh->necmap; i++) {
    for (int j = 0; j < mesh->ecmap_cnt[i]; j++, offset++) {
      if (recv_vec[offset] == mesh->ecmap_id[i]) {
        /* off-processor element is not changing processors.  */
        /* no changes are needed in the local data structure. */
        continue;
      }
      /* Change processor assignment in local element's adjacency list */
      int bor_elem = mesh->ecmap_elemids[offset];
      for (k = 0; k < elements[bor_elem].adj_len; k++) {

        /* Skip NULL adjacencies (sides that are not adj to another elem). */
        if (elements[bor_elem].adj[k] == ZOLTAN_ID_INVALID) continue;

        if (elements[bor_elem].adj[k] == mesh->ecmap_neighids[offset] &&
            elements[bor_elem].adj_proc[k] == mesh->ecmap_id[i]) {
          elements[bor_elem].adj_proc[k] = recv_vec[offset];
          if (recv_vec[offset] == proc) {
            /* element is moving to this processor; */
            /* convert adj from global to local ID. */
            int idx = in_list(mesh->ecmap_neighids[offset],New_Elem_Index_Size,
                              New_Elem_Index);
            if (idx == -1) {
              sprintf(msg, "fatal: unable to locate element " ZOLTAN_ID_SPEC " in "
                           "New_Elem_Index", mesh->ecmap_neighids[offset]);
              Gen_Error(0, msg);
              *ierr = ZOLTAN_FATAL;
              if (send_vec) delete [] send_vec;
              if (recv_vec) delete [] recv_vec;
              return;
            }
            elements[bor_elem].adj[k] = idx;
          }
          break;  /* from k loop */
        }
      }
    }
  }

  if (recv_vec) delete [] recv_vec;
  if (send_vec) delete [] send_vec;

  /*
   * Allocate space (if needed) for the new element data.
   */

  if (mesh->elem_array_len < New_Elem_Index_Size) {
    mesh->elem_array_len = New_Elem_Index_Size;

    // We don't use C++ new/delete here, because this was malloc'd
    // in some C code.

    mesh->elements = (ELEM_INFO_PTR) realloc (mesh->elements,
                                     mesh->elem_array_len * sizeof(ELEM_INFO));
    if (mesh->elements == NULL) {
      Gen_Error(0, "fatal: insufficient memory");
      return;
    }

    /* initialize the new spots */
    for (int i = mesh->num_elems; i < mesh->elem_array_len; i++)
      initialize_element(&(mesh->elements[i]));
  }
}
Exemplo n.º 3
0
int read_exoII_file(int Proc,
                    int Num_Proc,
                    PROB_INFO_PTR prob,
                    PARIO_INFO_PTR pio_info,
                    MESH_INFO_PTR mesh)
{
#ifndef ZOLTAN_NEMESIS
  Gen_Error(0, "Fatal:  Nemesis requested but not linked with driver.");
  return 0;

#else /* ZOLTAN_NEMESIS */
  /* Local declarations. */
  char  *yo = "read_exoII_mesh";
  char   par_nem_fname[FILENAME_MAX+1], title[MAX_LINE_LENGTH+1];
  char   cmesg[256];

  float  ver;

  int    i, pexoid, cpu_ws = 0, io_ws = 0;
  int   *nnodes = NULL, *etypes = NULL;
#ifdef DEBUG_EXO
  int    j, k, elem;
#endif
  FILE  *fdtmp;

/***************************** BEGIN EXECUTION ******************************/

  DEBUG_TRACE_START(Proc, yo);

  /* since this is a test driver, set error reporting in exodus */
  ex_opts(EX_VERBOSE | EX_DEBUG);

  /* generate the parallel filename for this processor */
  gen_par_filename(pio_info->pexo_fname, par_nem_fname, pio_info, Proc,
                   Num_Proc);

  /* 
   * check whether parallel file exists.  do the check with fopen 
   * as ex_open coredumps on the paragon when files do not exist.
   */

  if ((fdtmp = fopen(par_nem_fname, "r")) == NULL) {
    sprintf(cmesg,"fatal: parallel Exodus II file %s does not exist",
            par_nem_fname);
    Gen_Error(0, cmesg);
    return 0;
  }
  else
    fclose(fdtmp);

  /*
   * now open the existing parallel file using Exodus calls.
   */

  if ((pexoid = ex_open(par_nem_fname, EX_READ, &cpu_ws, &io_ws,
                        &ver)) < 0) {
    sprintf(cmesg,"fatal: could not open parallel Exodus II file %s",
            par_nem_fname);
    Gen_Error(0, cmesg);
    return 0;
  }

  /* and get initial information */
  if (ex_get_init(pexoid, title, &(mesh->num_dims),
                  &(mesh->num_nodes), &(mesh->num_elems),
                  &(mesh->num_el_blks), &(mesh->num_node_sets),
                  &(mesh->num_side_sets)) < 0) {
    Gen_Error(0, "fatal: Error returned from ex_get_init");
    return 0;
  }


  /* alocate some memory for the element blocks */
  mesh->data_type = MESH;
  mesh->vwgt_dim = 1;  /* One weight for now. */
  mesh->ewgt_dim = 1;  /* One weight for now. */
  mesh->eb_etypes = (int *) malloc (5 * mesh->num_el_blks * sizeof(int));
  if (!mesh->eb_etypes) {
    Gen_Error(0, "fatal: insufficient memory");
    return 0;
  }
  mesh->eb_ids = mesh->eb_etypes + mesh->num_el_blks;
  mesh->eb_cnts = mesh->eb_ids + mesh->num_el_blks;
  mesh->eb_nnodes = mesh->eb_cnts + mesh->num_el_blks;
  mesh->eb_nattrs = mesh->eb_nnodes + mesh->num_el_blks;

  mesh->eb_names = (char **) malloc (mesh->num_el_blks * sizeof(char *));
  if (!mesh->eb_names) {
    Gen_Error(0, "fatal: insufficient memory");
    return 0;
  }

  mesh->hindex = (int *) malloc(sizeof(int));
  mesh->hindex[0] = 0;

  if (ex_get_elem_blk_ids(pexoid, mesh->eb_ids) < 0) {
    Gen_Error(0, "fatal: Error returned from ex_get_elem_blk_ids");
    return 0;
  }

  /* allocate temporary storage for items needing global reduction.   */
  /* nemesis does not store most element block info about blocks for  */
  /* which the processor owns no elements.                            */
  /* we, however, use this information in migration, so we need to    */
  /* accumulate it for all element blocks.    kdd 2/2001              */

  if (mesh->num_el_blks > 0) {
    nnodes = (int *) malloc(2 * mesh->num_el_blks * sizeof(int));
    if (!nnodes) {
      Gen_Error(0, "fatal: insufficient memory");
      return 0;
    }
    etypes = nnodes + mesh->num_el_blks;
  }

  /* get the element block information */
  for (i = 0; i < mesh->num_el_blks; i++) {

    /* allocate space for name */
    mesh->eb_names[i] = (char *) malloc((MAX_STR_LENGTH+1) * sizeof(char));
    if (!mesh->eb_names[i]) {
      Gen_Error(0, "fatal: insufficient memory");
      return 0;
    }

    if (ex_get_elem_block(pexoid, mesh->eb_ids[i], mesh->eb_names[i],
                          &(mesh->eb_cnts[i]), &(nnodes[i]),
                          &(mesh->eb_nattrs[i])) < 0) {
      Gen_Error(0, "fatal: Error returned from ex_get_elem_block");
      return 0;
    }

    if (mesh->eb_cnts[i] > 0) {
      if ((etypes[i] =  (int) get_elem_type(mesh->eb_names[i],
                                            nnodes[i],
                                            mesh->num_dims)) == E_TYPE_ERROR) {
        Gen_Error(0, "fatal: could not get element type");
        return 0;
      }
    }
    else etypes[i] = (int) NULL_EL;
  }

  /* Perform reduction on necessary fields of element blocks.  kdd 2/2001 */
  MPI_Allreduce(nnodes, mesh->eb_nnodes, mesh->num_el_blks, MPI_INT, MPI_MAX, 
                MPI_COMM_WORLD);
  MPI_Allreduce(etypes, mesh->eb_etypes, mesh->num_el_blks, MPI_INT, MPI_MIN, 
                MPI_COMM_WORLD);
  for (i = 0; i < mesh->num_el_blks; i++) {
    strcpy(mesh->eb_names[i], get_elem_name(mesh->eb_etypes[i]));
  }
  free(nnodes);

  /*
   * allocate memory for the elements
   * allocate a little extra for element migration latter
   */
  mesh->elem_array_len = mesh->num_elems + 5;
  mesh->elements = (ELEM_INFO_PTR) malloc (mesh->elem_array_len 
                                         * sizeof(ELEM_INFO));
  if (!(mesh->elements)) {
    Gen_Error(0, "fatal: insufficient memory");
    return 0;
  }

  /*
   * intialize all of the element structs as unused by
   * setting the globalID to -1
   */
  for (i = 0; i < mesh->elem_array_len; i++) 
    initialize_element(&(mesh->elements[i]));

  /* read the information for the individual elements */
  if (!read_elem_info(pexoid, Proc, prob, mesh)) {
    Gen_Error(0, "fatal: Error returned from read_elem_info");
    return 0;
  }

  /* read the communication information */
  if (!read_comm_map_info(pexoid, Proc, prob, mesh)) {
    Gen_Error(0, "fatal: Error returned from read_comm_map_info");
    return 0;
  }

  /* Close the parallel file */
  if(ex_close (pexoid) < 0) {
    Gen_Error(0, "fatal: Error returned from ex_close");
    return 0;
  }

  /* print out the distributed mesh */
  if (Debug_Driver > 3)
    print_distributed_mesh(Proc, Num_Proc, mesh);

  DEBUG_TRACE_END(Proc, yo);
  return 1;

#endif /* ZOLTAN_NEMESIS */
}
Exemplo n.º 4
0
int chaco_setup_mesh_struct(
  int        Proc,
  int        Num_Proc,
  PROB_INFO_PTR prob,            /* problem description */
  MESH_INFO_PTR mesh,            /* mesh information for the problem */
  int        gnvtxs,             /* global number of vertices across all procs*/
  int        nvtxs,              /* number of vertices in local graph */
  int       *start,              /* start of edge list for each vertex */
  int       *adj,                /* edge list data */
  int        vwgt_dim,           /* # of weights per vertex */
  float     *vwgts,              /* vertex weight list data */
  int        ewgt_dim,           /* # of weights per edge */
  float     *ewgts,              /* edge weight list data */
  int        ndim,               /* dimension of the geometry */
  float     *x,                  /* x-coordinates of the vertices */
  float     *y,                  /* y-coordinates of the vertices */
  float     *z,                  /* z-coordinates of the vertices */
  short     *assignments,        /* assignments from Chaco file; may be NULL */
  int       base,                /* smallest vertex number to use; 
                                    base == 1 for Chaco; 
                                    may be 0 or 1 for HG files. */
  int       no_geom              /* flag indicating whether coords are avail. */
)
{
const char *yo = "chaco_setup_mesh_struct";
int i;

  DEBUG_TRACE_START(Proc, yo);

  /* Initialize mesh structure for Chaco mesh. */
  mesh->data_type = ZOLTAN_GRAPH;
  mesh->vwgt_dim = vwgt_dim;
  mesh->ewgt_dim = ewgt_dim;
  mesh->num_elems = nvtxs;
  mesh->elem_array_len = mesh->num_elems + 5;
  mesh->num_dims = ndim;
  mesh->num_el_blks = 1;

  mesh->eb_etypes = (int *) malloc (5 * mesh->num_el_blks * sizeof(int));
  if (!mesh->eb_etypes) {
    Gen_Error(0, "fatal: insufficient memory");
    return 0;
  }
  mesh->eb_ids = mesh->eb_etypes + mesh->num_el_blks;
  mesh->eb_cnts = mesh->eb_ids + mesh->num_el_blks;
  mesh->eb_nnodes = mesh->eb_cnts + mesh->num_el_blks;
  mesh->eb_nattrs = mesh->eb_nnodes + mesh->num_el_blks;

  mesh->eb_names = (char **) malloc (mesh->num_el_blks * sizeof(char *));
  if (!mesh->eb_names) {
    Gen_Error(0, "fatal: insufficient memory");
    return 0;
  }

  mesh->eb_etypes[0] = -1;
  mesh->eb_ids[0] = 1;
  mesh->eb_cnts[0] = nvtxs;
  mesh->eb_nattrs[0] = 0;

  mesh->hindex = (int *) malloc(sizeof(int));
  mesh->hindex[0] = 0;

  /*
   * Each element has one set of coordinates (i.e., node) if a coords file
   * was provided; zero otherwise. 
   */
  MPI_Bcast( &no_geom, 1, MPI_INT, 0, MPI_COMM_WORLD);
  if (no_geom)
    mesh->eb_nnodes[0] = 0;
  else
    mesh->eb_nnodes[0] = 1;

  /* allocate space for name */
  mesh->eb_names[0] = (char *) malloc((MAX_STR_LENGTH+1) * sizeof(char));
  if (!mesh->eb_names[0]) {
    Gen_Error(0, "fatal: insufficient memory");
    return 0;
  }
  strcpy(mesh->eb_names[0], "chaco");

  /* allocate the element structure array */
  mesh->elements = (ELEM_INFO_PTR) malloc (mesh->elem_array_len 
                                         * sizeof(ELEM_INFO));
  if (!(mesh->elements)) {
    Gen_Error(0, "fatal: insufficient memory");
    return 0;
  }

  /*
   * intialize all of the element structs as unused by
   * setting the globalID to -1
   */
  for (i = 0; i < mesh->elem_array_len; i++) 
    initialize_element(&(mesh->elements[i]));

  /*
   * now fill the element structure array with the
   * information from the Chaco file
   */
  if (!chaco_fill_elements(Proc, Num_Proc, prob, mesh, gnvtxs, nvtxs,
                     start, adj, vwgt_dim, vwgts, ewgt_dim, ewgts, 
                     ndim, x, y, z, assignments, 1)) {
    Gen_Error(0, "fatal: Error returned from chaco_fill_elements");
    return 0;
  }

  DEBUG_TRACE_END(Proc, yo);
  return 1;
}
Exemplo n.º 5
0
void migrate_pre_process(void *data, int num_gid_entries, int num_lid_entries,
                         int num_import,
                         ZOLTAN_ID_PTR import_global_ids,
                         ZOLTAN_ID_PTR import_local_ids, int *import_procs,
                         int *import_to_part,
                         int num_export, ZOLTAN_ID_PTR export_global_ids,
                         ZOLTAN_ID_PTR export_local_ids, int *export_procs,
                         int *export_to_part,
                         int *ierr)
{
    int i, j, k, idx, maxlen, proc, offset;
    int *proc_ids = NULL;   /* Temp array of processor assignments for elements.*/
    char *change = NULL;    /* Temp array indicating whether local element's adj
                           list must be updated due to a nbor's migration.  */
    int new_proc;           /* New processor assignment for nbor element.       */
    int exp_elem;           /* index of an element being exported */
    int bor_elem;           /* index of an element along the processor border */
    int *send_vec = NULL, *recv_vec = NULL;  /* Communication vecs. */
    MESH_INFO_PTR mesh;
    ELEM_INFO_PTR elements;
    int lid = num_lid_entries-1;
    int gid = num_gid_entries-1;
    char msg[256];

    *ierr = ZOLTAN_OK;

    if (data == NULL) {
        *ierr = ZOLTAN_FATAL;
        return;
    }
    mesh = (MESH_INFO_PTR) data;
    elements = mesh->elements;

    for (i=0; i < mesh->num_elems; i++) {
        /* don't migrate a pointer created on this process */
        safe_free((void **)(void *)&(elements[i].adj_blank));
    }

    /*
     *  Set some flags. Assume if true for one element, true for all elements.
     *  Note that some procs may have no elements.
     */

    if (elements[0].edge_wgt != NULL)
        k = 1;
    else
        k = 0;
    /* Make sure all procs have the same value */
    MPI_Allreduce(&k, &Use_Edge_Wgts, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);

    /* NOT IMPLEMENTED: blanking information is not sent along.  Subsequent
       lb_eval may be incorrect, since imported elements may have blanked
       adjacencies.

    if (mesh->blank_count > 0)
      k = 1;
    else
      k = 0;

    MPI_Allreduce(&k, &Vertex_Blanking, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);

    */

    /*
     *  For all elements, update adjacent elements' processor information.
     *  That way, when perform migration, will be migrating updated adjacency
     *  information.
     */

    MPI_Comm_rank(MPI_COMM_WORLD, &proc);

    /*
     *  Build New_Elem_Index array and list of processor assignments.
     */

    New_Elem_Index_Size = mesh->num_elems + num_import - num_export;
    if (mesh->elem_array_len > New_Elem_Index_Size)
        New_Elem_Index_Size = mesh->elem_array_len;
    New_Elem_Index = (int *) malloc(New_Elem_Index_Size * sizeof(int));
    New_Elem_Hash_Table = (int *) malloc(New_Elem_Index_Size * sizeof(int));
    New_Elem_Hash_Nodes = (struct New_Elem_Hash_Node *)
                          malloc(New_Elem_Index_Size * sizeof(struct New_Elem_Hash_Node));

    if (New_Elem_Index == NULL ||
            New_Elem_Hash_Table == NULL || New_Elem_Hash_Nodes == NULL) {
        Gen_Error(0, "fatal: insufficient memory");
        *ierr = ZOLTAN_MEMERR;
        return;
    }

    for (i = 0; i < New_Elem_Index_Size; i++)
        New_Elem_Hash_Table[i] = -1;
    for (i = 0; i < New_Elem_Index_Size; i++) {
        New_Elem_Hash_Nodes[i].globalID = -1;
        New_Elem_Hash_Nodes[i].localID = -1;
        New_Elem_Hash_Nodes[i].next = -1;
    }

    if (mesh->num_elems > 0) {

        proc_ids = (int *)  malloc(mesh->num_elems * sizeof(int));
        change   = (char *) malloc(mesh->num_elems * sizeof(char));

        if (New_Elem_Index == NULL || proc_ids == NULL || change == NULL ||
                New_Elem_Hash_Table == NULL || New_Elem_Hash_Nodes == NULL) {
            Gen_Error(0, "fatal: insufficient memory");
            *ierr = ZOLTAN_MEMERR;
            return;
        }

        for (i = 0; i < mesh->num_elems; i++) {
            New_Elem_Index[i] = elements[i].globalID;
            insert_in_hash(elements[i].globalID, i);
            proc_ids[i] = proc;
            change[i] = 0;
        }
    }

    for (i = mesh->num_elems; i < New_Elem_Index_Size; i++) {
        New_Elem_Index[i] = -1;
    }

    for (i = 0; i < num_export; i++) {
        if (num_lid_entries)
            exp_elem = export_local_ids[lid+i*num_lid_entries];
        else  /* testing num_lid_entries == 0 */
            search_by_global_id(mesh, export_global_ids[gid+i*num_gid_entries],
                                &exp_elem);

        if (export_procs[i] != proc) {
            /* Export is moving to a new processor */
            New_Elem_Index[exp_elem] = -1;
            remove_from_hash(export_global_ids[gid+i*num_gid_entries]);
            proc_ids[exp_elem] = export_procs[i];
        }
    }

    j = 0;
    for (i = 0; i < num_import; i++) {
        if (import_procs[i] != proc) {
            /* Import is moving from a new processor, not just from a new partition */
            /* search for first free location */
            for ( ; j < New_Elem_Index_Size; j++)
                if (New_Elem_Index[j] == -1) break;

            New_Elem_Index[j] = import_global_ids[gid+i*num_gid_entries];
            insert_in_hash((int) import_global_ids[gid+i*num_gid_entries], j);
        }
    }

    /*
     * Update local information
     */

    /* Set change flag for elements whose adjacent elements are being exported */

    for (i = 0; i < num_export; i++) {

        if (num_lid_entries)
            exp_elem = export_local_ids[lid+i*num_lid_entries];
        else  /* testing num_lid_entries == 0 */
            search_by_global_id(mesh, export_global_ids[gid+i*num_gid_entries],
                                &exp_elem);

        elements[exp_elem].my_part = export_to_part[i];

        if (export_procs[i] == proc)
            continue;  /* No adjacency changes needed if export is changing
                    only partition, not processor. */

        for (j = 0; j < elements[exp_elem].adj_len; j++) {

            /* Skip NULL adjacencies (sides that are not adjacent to another elem). */
            if (elements[exp_elem].adj[j] == -1) continue;

            /* Set change flag for adjacent local elements. */
            if (elements[exp_elem].adj_proc[j] == proc) {
                change[elements[exp_elem].adj[j]] = 1;
            }
        }
    }

    /* Change adjacency information in marked elements */
    for (i = 0; i < mesh->num_elems; i++) {
        if (change[i] == 0) continue;

        /* loop over marked element's adjacencies; look for ones that are moving */
        for (j = 0; j < elements[i].adj_len; j++) {

            /* Skip NULL adjacencies (sides that are not adjacent to another elem). */
            if (elements[i].adj[j] == -1) continue;

            if (elements[i].adj_proc[j] == proc) {
                /* adjacent element is local; check whether it is moving. */
                if ((new_proc = proc_ids[elements[i].adj[j]]) != proc) {
                    /* Adjacent element is being exported; update this adjacency entry */
                    elements[i].adj[j] = elements[elements[i].adj[j]].globalID;
                    elements[i].adj_proc[j] = new_proc;
                }
            }
        }
    }
    safe_free((void **)(void *) &change);

    /*
     * Update off-processor information
     */

    maxlen = 0;
    for (i = 0; i < mesh->necmap; i++)
        maxlen += mesh->ecmap_cnt[i];

    if (maxlen > 0) {
        send_vec = (int *) malloc(maxlen * sizeof(int));
        if (send_vec == NULL) {
            Gen_Error(0, "fatal: insufficient memory");
            *ierr = ZOLTAN_MEMERR;
            return;
        }

        /* Load send vector */

        for (i = 0; i < maxlen; i++)
            send_vec[i] = proc_ids[mesh->ecmap_elemids[i]];
    }

    safe_free((void **)(void *) &proc_ids);

    if (maxlen > 0)
        recv_vec = (int *) malloc(maxlen * sizeof(int));

    /*  Perform boundary exchange */

    boundary_exchange(mesh, 1, send_vec, recv_vec);

    /* Unload receive vector */

    offset = 0;
    for (i = 0; i < mesh->necmap; i++) {
        for (j = 0; j < mesh->ecmap_cnt[i]; j++, offset++) {
            if (recv_vec[offset] == mesh->ecmap_id[i]) {
                /* off-processor element is not changing processors.  */
                /* no changes are needed in the local data structure. */
                continue;
            }
            /* Change processor assignment in local element's adjacency list */
            bor_elem = mesh->ecmap_elemids[offset];
            for (k = 0; k < elements[bor_elem].adj_len; k++) {

                /* Skip NULL adjacencies (sides that are not adj to another elem). */
                if (elements[bor_elem].adj[k] == -1) continue;

                if (elements[bor_elem].adj[k] == mesh->ecmap_neighids[offset] &&
                        elements[bor_elem].adj_proc[k] == mesh->ecmap_id[i]) {
                    elements[bor_elem].adj_proc[k] = recv_vec[offset];
                    if (recv_vec[offset] == proc) {
                        /* element is moving to this processor; */
                        /* convert adj from global to local ID. */
                        idx = find_in_hash(mesh->ecmap_neighids[offset]);
                        if (idx >= 0)
                            idx = New_Elem_Hash_Nodes[idx].localID;
                        else {
                            sprintf(msg, "fatal: unable to locate element %d in "
                                    "New_Elem_Index", mesh->ecmap_neighids[offset]);
                            Gen_Error(0, msg);
                            *ierr = ZOLTAN_FATAL;
                            return;
                        }
                        elements[bor_elem].adj[k] = idx;
                    }
                    break;  /* from k loop */
                }
            }
        }
    }

    safe_free((void **)(void *) &recv_vec);
    safe_free((void **)(void *) &send_vec);

    /*
     * Allocate space (if needed) for the new element data.
     */

    if (mesh->elem_array_len < New_Elem_Index_Size) {
        mesh->elem_array_len = New_Elem_Index_Size;
        mesh->elements = (ELEM_INFO_PTR) realloc (mesh->elements,
                         mesh->elem_array_len * sizeof(ELEM_INFO));
        if (mesh->elements == NULL) {
            Gen_Error(0, "fatal: insufficient memory");
            return;
        }

        /* initialize the new spots */
        for (i = mesh->num_elems; i < mesh->elem_array_len; i++)
            initialize_element(&(mesh->elements[i]));
    }
}
Exemplo n.º 6
0
static int setup_mesh_struct(
  int        Proc,
  int        Num_Proc,
  PROB_INFO_PTR prob,            /* problem description */
  MESH_INFO_PTR mesh,            /* mesh information for the problem */
  PARIO_INFO_PTR pio_info,       /* element distribution info*/
  ZOLTAN_ID_TYPE   gnvtxs,             /* global number of vertices across all procs*/
  int        nvtxs,              /* number of vertices in local graph */
  int       *start,              /* start of edge list for each vertex */
  ZOLTAN_ID_TYPE  *adj,                /* edge list data */
  int        vwgt_dim,           /* # of weights per vertex */
  float     *vwgts,              /* vertex weight list data */
  int        ewgt_dim,           /* # of weights per edge */
  float     *ewgts,              /* edge weight list data */
  int        ndim,               /* dimension of the geometry */
  float     *x,                  /* x-coordinates of the vertices */
  float     *y,                  /* y-coordinates of the vertices */
  float     *z                   /* z-coordinates of the vertices */
)
{
const char *yo = "setup_mesh_struct";
int i, j, k;
ZOLTAN_ID_TYPE elem_id;
ZOLTAN_ID_TYPE min_vtx;

  DEBUG_TRACE_START(Proc, yo);

  /* Initialize mesh structure for Chaco mesh. */
  mesh->data_type = ZOLTAN_GRAPH;
  mesh->vwgt_dim = vwgt_dim;
  mesh->ewgt_dim = ewgt_dim;
  mesh->num_elems = nvtxs;
  mesh->elem_array_len = mesh->num_elems + 5;
  mesh->num_dims = ndim;
  mesh->num_el_blks = 1;

  mesh->eb_etypes = (int *) malloc (4 * mesh->num_el_blks * sizeof(int));
  if (!mesh->eb_etypes) {
    Gen_Error(0, "fatal: insufficient memory");
    return 0;
  }
  mesh->eb_ids = mesh->eb_etypes + mesh->num_el_blks;
  mesh->eb_nnodes = mesh->eb_ids + mesh->num_el_blks;
  mesh->eb_nattrs = mesh->eb_nnodes + mesh->num_el_blks;

  mesh->eb_cnts = (ZOLTAN_ID_TYPE *) malloc (mesh->num_el_blks * sizeof(ZOLTAN_ID_TYPE));
  if (!mesh->eb_cnts) {
    Gen_Error(0, "fatal: insufficient memory");
    return 0;
  }

  mesh->eb_names = (char **) malloc (mesh->num_el_blks * sizeof(char *));
  if (!mesh->eb_names) {
    Gen_Error(0, "fatal: insufficient memory");
    return 0;
  }

  mesh->eb_etypes[0] = -1;
  mesh->eb_ids[0] = 1;
  mesh->eb_cnts[0] = (ZOLTAN_ID_TYPE)nvtxs;
  mesh->eb_nattrs[0] = 0;

  mesh->hindex = (int *) malloc(sizeof(int));
  mesh->hindex[0] = 0;
  mesh->eb_nnodes[0] = 1;

  /* allocate space for name */
  mesh->eb_names[0] = (char *) malloc(16* sizeof(char));
  if (!mesh->eb_names[0]) {
    Gen_Error(0, "fatal: insufficient memory");
    return 0;
  }
  strcpy(mesh->eb_names[0], "random-graph");

  /* allocate the element structure array */
  mesh->elements = (ELEM_INFO_PTR) malloc (mesh->elem_array_len * sizeof(ELEM_INFO));
  if (!(mesh->elements)) {
    Gen_Error(0, "fatal: insufficient memory");
    return 0;
  }

  /* write element data */

  for (i = 0; i < mesh->elem_array_len; i++) 
    initialize_element(&(mesh->elements[i]));

  min_vtx = local_to_global_id_map(0, Proc);

  for (i = 0; i < nvtxs; i++) {
    mesh->elements[i].globalID = local_to_global_id_map(i, Proc);
                                                       
    if (vwgts != NULL){
      for (j=0; j<vwgt_dim; j++) {
        mesh->elements[i].cpu_wgt[j] = vwgts[i*vwgt_dim+j];
      }
    }
    else
      mesh->elements[i].cpu_wgt[0] = 1.0;

    mesh->elements[i].elem_blk = 0;
    mesh->elements[i].my_part = Proc;

    if (mesh->num_dims > 0) {
      /* One set of coords per element. */
      mesh->elements[i].connect = (ZOLTAN_ID_TYPE *) malloc(sizeof(ZOLTAN_ID_TYPE));
      mesh->elements[i].connect[0] = mesh->elements[i].globalID;
      mesh->elements[i].coord = (float **) malloc(sizeof(float *));
      mesh->elements[i].coord[0] = (float *) calloc(mesh->num_dims, sizeof(float));  
      mesh->elements[i].coord[0][0] = x[i];
      mesh->elements[i].avg_coord[0] = x[i];
      if (mesh->num_dims > 1) {
        mesh->elements[i].coord[0][1] = y[i];
        mesh->elements[i].avg_coord[1] = y[i];
        if (mesh->num_dims > 2) {
          mesh->elements[i].coord[0][2] = z[i];
          mesh->elements[i].avg_coord[2] = z[i];
        }
      }
    }
  }

  for (i = 0; i < nvtxs; i++) {
    /* now start with the adjacencies */
    if (start != NULL)
      mesh->elements[i].nadj = start[i+1] - start[i];
    else
      mesh->elements[i].nadj = 0;
    if (mesh->elements[i].nadj > 0) {
      mesh->elements[i].adj_len = mesh->elements[i].nadj;
      mesh->elements[i].adj = (ZOLTAN_ID_TYPE *) malloc (mesh->elements[i].nadj * sizeof(ZOLTAN_ID_TYPE));
      mesh->elements[i].adj_proc = (int *) malloc (mesh->elements[i].nadj * sizeof(int));
      if (!(mesh->elements[i].adj) || !(mesh->elements[i].adj_proc)) {
        Gen_Error(0, "fatal: insufficient memory");
        return 0;
      }
      if (ewgts != NULL) {
        mesh->elements[i].edge_wgt = (float *) malloc (mesh->elements[i].nadj * sizeof(float));
        if (!(mesh->elements[i].edge_wgt)) {
          Gen_Error(0, "fatal: insufficient memory");
          return 0;
        }
      }
      else
        mesh->elements[i].edge_wgt = NULL;

      for (j = 0; j < mesh->elements[i].nadj; j++) {
        elem_id = adj[start[i] + j];

        k = global_to_proc_owner_map(elem_id, Num_Proc, Proc);

        /*
         * if the adjacent element is on this processor
         * then find the local id for that element
         */
        if (k == Proc) 
          mesh->elements[i].adj[j] = elem_id-min_vtx;
        else /* use the global id */
          mesh->elements[i].adj[j] = elem_id;

        mesh->elements[i].adj_proc[j] = k;

        if (ewgts != NULL)
          mesh->elements[i].edge_wgt[j] = ewgts[start[i] + j];
      }
    } /* End: "if (mesh->elements[i].nadj > 0)" */
  } /* End: "for (i = 0; i < mesh->num_elems; i++)" */

  if (!build_elem_comm_maps(Proc, mesh)) {
    Gen_Error(0, "Fatal: error building initial elem comm maps");
    return 0;
  }

  if (Debug_Driver > 3)
    print_distributed_mesh(Proc, Num_Proc, mesh);

  DEBUG_TRACE_END(Proc, yo);
  return 1;
}
Exemplo n.º 7
0
/* Read "matrixmarket plus", the format written by Zoltan_Generate_Files.
 *
 * This format is our own extension of the NIST Matrix Market file
 * format.  We wished to store vertex and edge weights, and also
 * pin, vertex weight and edge weight ownership data in the file.
 * Here are some rules from the NIST design document:
 *  1. lines are limited to 1024 characters
 *  2. blank lines may appear anywhere after the first line
 *  3. numeric data on a line is separated by one or more blanks
 *  4. real data is in floating-point decimal format, can use "e" notation
 *  5. all indices are 1-based
 *  6. character data may be upper or lower case.
 *
 * The contents of the file reflects the data returned by the
 * application in the hypergraph query functions.  In particular:
 *
 * Each process supplied some subset of pins to Zoltan.  Each owned
 * some of the vertices and supplied weights for those.  Each may have
 * supplied weights for edges.  The edges need not be the edges of
 * their pins.  More than one process may have supplied a weight for
 * the same edge.
 */
int read_mtxplus_file(
  int Proc,
  int Num_Proc,
  PROB_INFO_PTR prob,
  PARIO_INFO_PTR pio_info,
  MESH_INFO_PTR mesh
)
{
  /* Local declarations. */
  const char  *yo = "read_mtxplus_file";
  char filename[256], cmesg[256];
  struct stat statbuf;
  int rc, fsize, i, j;
  char *filebuf=NULL;
  FILE* fp;
  int nGlobalEdges, nGlobalVtxs, vtxWDim, edgeWDim;
  int nMyPins, nMyVtx, nMyEdgeWgts;
  int *myPinI, *myPinJ, *myVtxNum, *myEWGno;
  float *myVtxWgts, *myEdgeWgts;
  int status;
  int numHEdges;
  int *edgeGno, *edgeIdx, *pinGno;

  DEBUG_TRACE_START(Proc, yo);

  /* Process 0 reads the file and broadcasts it */

  if (Proc == 0) {
    fsize = 0;

    sprintf(filename, "%s.mtxp", pio_info->pexo_fname);
    if (pio_info->file_comp == GZIP)
      sprintf(filename, "%s.gz", filename);

    rc = stat(filename, &statbuf);

    if (rc == 0){
      fsize = statbuf.st_size;
      fp = fopen(filename, "r");

      if (!fp){
	fsize = 0;
      }
      else{
	filebuf = (char *)malloc(fsize+1);

	rc = fread(filebuf, 1, fsize, fp);

	if (rc != fsize){
	  free(filebuf);
	  fsize = 0;
	  fp = NULL;
	}
	else{
	  filebuf[fsize] = 0;
	  fsize++;
	}
	fclose(fp);
      }
    }
  }

  MPI_Bcast(&fsize, 1, MPI_INT, 0, MPI_COMM_WORLD);

  if (fsize == 0) {
    sprintf(cmesg, "fatal:  Could not open/read hypergraph file %s", filename);
    Gen_Error(0, cmesg);
    return 0;
  }

  if (Proc > 0){
    filebuf = (char *)malloc(fsize);
  }

  MPI_Bcast(filebuf, fsize, MPI_BYTE, 0, MPI_COMM_WORLD);

  /* Each process reads through the file, obtaining it's
   * pins, vertex weights and edge weights.  The file lists
   * global IDs for the vertices and edges.  These will be
   * assigned global numbers based on the order they appear
   * in the file.  The global numbers begin with zero.
   * Returns 1 on success, 0 on failure.
   */

  rc = process_mtxp_file(pio_info, filebuf, fsize, Num_Proc, Proc,
	  &nGlobalEdges, &nGlobalVtxs, &vtxWDim, &edgeWDim,
	  &nMyPins, &myPinI, &myPinJ,
	  &nMyVtx, &myVtxNum, &myVtxWgts,
	  &nMyEdgeWgts, &myEWGno, &myEdgeWgts);

  free(filebuf);

  MPI_Allreduce(&rc, &status, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

  if (status != Num_Proc){
    return 0;
  }

  /*
   * From the lists of pins, create edge lists.  (Unless
   * the initial pin distribution is by column, in which
   * case we will test the hypergraph query interface's
   * ability to accept pins by column rather than row.)
   */

  if (pio_info->init_dist_pins != INITIAL_COL){       /* CRS */
    rc = create_edge_lists(nMyPins, myPinI, myPinJ,
	    &numHEdges, &edgeGno, &edgeIdx, &pinGno);
    mesh->format = ZOLTAN_COMPRESSED_EDGE;
  }
  else{                                               /* CCS */
    /* actually creating vertex lists, since we switched
     * the role of I and J in the argument list.
     */
    rc = create_edge_lists(nMyPins, myPinJ, myPinI,
	    &numHEdges, &edgeGno, &edgeIdx, &pinGno);
    mesh->format = ZOLTAN_COMPRESSED_VERTEX;
  }

  MPI_Allreduce(&rc, &status, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

  if (status != Num_Proc){
    return 0;
  }

  safe_free((void **)(void *)&myPinI);
  safe_free((void **)(void *)&myPinJ);

  /* Initialize mesh structure for Hypergraph. */
  mesh->data_type = HYPERGRAPH;
  mesh->num_elems = nMyVtx;
  mesh->vwgt_dim = vtxWDim;
  mesh->ewgt_dim = 0;
  mesh->elem_array_len = mesh->num_elems + 5;
  mesh->num_dims = 0;
  mesh->num_el_blks = 1;

  mesh->gnhedges = nGlobalEdges;
  mesh->nhedges = numHEdges;     /* (or num vertices if CCS) */
  mesh->hewgt_dim = edgeWDim;

  mesh->hgid = edgeGno;          /* (or vertex gno if CCS) */
  mesh->hindex = edgeIdx;        /* (or vertex index if CCS) */
  mesh->hvertex = pinGno;        /* (or gno of pin edge if CCS) */
  mesh->hvertex_proc = NULL;     /* don't know don't care */
  mesh->heNumWgts = nMyEdgeWgts;
  mesh->heWgtId = myEWGno;
  mesh->hewgts = myEdgeWgts;

  mesh->eb_etypes = (int *) malloc (5 * mesh->num_el_blks * sizeof(int));
  if (!mesh->eb_etypes) {
    Gen_Error(0, "fatal: insufficient memory");
    return 0;
  }
  mesh->eb_ids = mesh->eb_etypes + mesh->num_el_blks;
  mesh->eb_cnts = mesh->eb_ids + mesh->num_el_blks;
  mesh->eb_nnodes = mesh->eb_cnts + mesh->num_el_blks;
  mesh->eb_nattrs = mesh->eb_nnodes + mesh->num_el_blks;

  mesh->eb_names = (char **) malloc (mesh->num_el_blks * sizeof(char *));
  if (!mesh->eb_names) {
    Gen_Error(0, "fatal: insufficient memory");
    return 0;
  }

  mesh->eb_etypes[0] = -1;
  mesh->eb_ids[0] = 1;
  mesh->eb_cnts[0] = nGlobalVtxs;
  mesh->eb_nattrs[0] = 0;
  mesh->eb_nnodes[0] = 0;

  /* allocate space for name */
  mesh->eb_names[0] = (char *) malloc((MAX_STR_LENGTH+1) * sizeof(char));
  if (!mesh->eb_names[0]) {
    Gen_Error(0, "fatal: insufficient memory");
    return 0;
  }
  strcpy(mesh->eb_names[0], "hypergraph");

  /* allocate the element structure array */
  mesh->elements = (ELEM_INFO_PTR) malloc (mesh->elem_array_len
					 * sizeof(ELEM_INFO));
  if (!(mesh->elements)) {
    Gen_Error(0, "fatal: insufficient memory");
    return 0;
  }

  /*
   * Write the element structure with the vertices and weights
   */
  for (i = 0; i < mesh->elem_array_len; i++) {
    initialize_element(&(mesh->elements[i]));
    if (i < mesh->num_elems){
      mesh->elements[i].globalID = myVtxNum[i];
      mesh->elements[i].my_part  = Proc;
      for (j=0; j<vtxWDim; j++){
	mesh->elements[i].cpu_wgt[j] = myVtxWgts[i*vtxWDim + j];
      }
    }
  }

  safe_free((void **)(void *) &myVtxWgts);
  safe_free((void **)(void *) &myVtxNum);

 if (Debug_Driver > 3)
   print_distributed_mesh(Proc, Num_Proc, mesh);

  DEBUG_TRACE_END(Proc, yo);
  return 1;
}
Exemplo n.º 8
0
/* Read from file and set up hypergraph. */
int read_hypergraph_file(
  int Proc,
  int Num_Proc,
  PROB_INFO_PTR prob,
  PARIO_INFO_PTR pio_info,
  MESH_INFO_PTR mesh
)
{
  /* Local declarations. */
  const char  *yo = "read_hypergraph_file";
  char   cmesg[256];

  int    i, gnvtxs, distributed_pins = 0, edge, vertex, nextEdge;
  int    nvtxs = 0, gnhedges = 0, nhedges = 0, npins = 0;
  int    vwgt_dim=0, hewgt_dim=0, vtx, edgeSize, global_npins;
  int   *hindex = NULL, *hvertex = NULL, *hvertex_proc = NULL;
  int   *hgid = NULL;
  float *hewgts = NULL, *vwgts = NULL;
  ZOLTAN_FILE* fp = NULL;
  int base = 0;   /* Smallest vertex number; usually zero or one. */
  char filename[256];

  /* Variables that allow graph-based functions to be reused. */
  /* If no chaco.graph or chaco.coords files exist, values are NULL or 0,
   * since graph is not being built. If chaco.graph and/or chaco.coords
   * exist, these arrays are filled and values stored in mesh.
   * Including these files allows for comparison of HG methods with other
   * methods, along with visualization of results and comparison of
   * LB_Eval results.
   */
  int    ch_nvtxs = 0;        /* Temporary values for chaco_read_graph.   */
#ifdef KDDKDD
  int    ch_vwgt_dim = 0;     /* Their values are ignored, as vertex      */
#endif
  float *ch_vwgts = NULL;     /* info is provided by hypergraph file.     */
  int   *ch_start = NULL, *ch_adj = NULL, ch_ewgt_dim = 0;
  short *ch_assignments = NULL;
  float *ch_ewgts = NULL;
  int    ch_ndim = 0;
  float *ch_x = NULL, *ch_y = NULL, *ch_z = NULL;
  int    ch_no_geom = TRUE;   /* Assume no geometry info is given; reset if
				 it is provided. */
  int    file_error = 0;

/***************************** BEGIN EXECUTION ******************************/

  DEBUG_TRACE_START(Proc, yo);

  if (Proc == 0) {

    /* Open and read the hypergraph file. */
    if (pio_info->file_type == HYPERGRAPH_FILE)
      sprintf(filename, "%s.hg", pio_info->pexo_fname);
    else if (pio_info->file_type == MATRIXMARKET_FILE)
      sprintf(filename, "%s.mtx", pio_info->pexo_fname);
    else {
	sprintf(cmesg, "fatal:  invalid file type %d", pio_info->file_type);
	Gen_Error(0, cmesg);
	return 0;
    }

      fp = ZOLTAN_FILE_open(filename, "r", pio_info->file_comp);
      file_error = (fp == NULL);
  }



  MPI_Bcast(&file_error, 1, MPI_INT, 0, MPI_COMM_WORLD);

  if (file_error) {
    sprintf(cmesg,
      "fatal:  Could not open hypergraph file %s",pio_info->pexo_fname);
    Gen_Error(0, cmesg);
    return 0;
  }

  if (pio_info->file_type == HYPERGRAPH_FILE) {
    /* read the array in on processor 0 */
    if (Proc == 0) {
      if (HG_readfile(Proc, fp, &nvtxs, &nhedges, &npins,
		    &hindex, &hvertex, &vwgt_dim, &vwgts,
		    &hewgt_dim, &hewgts, &base) != 0){
	Gen_Error(0, "fatal: Error returned from HG_readfile");
	return 0;
      }
    }
  }
  else if (pio_info->file_type == MATRIXMARKET_FILE) {
    /*
     * pio_info->chunk_reader == 0  (the usual case)
     *   process 0 will read entire file in MM_readfile,
     *   and will distribute vertices in chaco_dist_graph and pins in
     *   dist_hyperedges later.   (distributed_pins==0)
     *
     * pio_info->chunk_reader == 1  ("initial read = chunks" in zdrive.inp)
     *   process 0 will read the file in chunks, and will send vertices
     *   and pins to other processes before reading the next chunk, all
     *   in MM_readfile.  (distributed_pins==1)
     */

    if (MM_readfile(Proc, Num_Proc, fp, pio_info,
		    &nvtxs,     /* global number of vertices */
		    &nhedges,   /* global number of hyperedges */
		    &npins,     /* local number of pins */
		    &hindex, &hvertex, &vwgt_dim, &vwgts,
		    &hewgt_dim, &hewgts, &ch_start, &ch_adj,
		    &ch_ewgt_dim, &ch_ewgts, &base, &global_npins)) {
      Gen_Error(0, "fatal: Error returned from MM_readfile");
      return 0;
    }

    if (Proc == 0) ZOLTAN_FILE_close(fp);

    if ((Num_Proc > 1) && pio_info->chunk_reader && (global_npins > Num_Proc)){
      distributed_pins = 1;
    }
    else{
      distributed_pins = 0;
    }
  }


#ifdef KDDKDD
 {
   /* If CHACO graph file is available, read it. */

   sprintf(filename, "%s.graph", pio_info->pexo_fname);

   fp = ZOLTAN_FILE_open(filename, "r", pio_info->file_comp);
   file_error =
#ifndef ZOLTAN_COMPRESS
     (fp == NULL);
#else
   fp.error;
#endif


   if (!file_error) {
      /* CHACO graph file is available. */
      /* Assuming hypergraph vertices are same as chaco vertices. */
      /* Chaco vertices and their weights are ignored in rest of function. */
      if (chaco_input_graph(fp, filename, &ch_start, &ch_adj, &ch_nvtxs,
		      &ch_vwgt_dim, &ch_vwgts, &ch_ewgt_dim, &ch_ewgts) != 0) {
	Gen_Error(0, "fatal: Error returned from chaco_input_graph");
	return 0;
      }
    }
   else
     ch_nvtxs = nvtxs;


    /* If coordinate file is available, read it. */
   sprintf(filename, "%s.coords", pio_info->pexo_fname);

   fp = ZOLTAN_FILE_open(filename, "r", pio_info->file_comp);
   file_error =
#ifndef ZOLTAN_COMPRESS
     (fp == NULL);
#else
   fp.error;
#endif

    if (!file_error) {
      /* CHACO coordinates file is available. */
      ch_no_geom = FALSE;
      if (chaco_input_geom(fpkdd, filename, ch_nvtxs, &ch_ndim,
			   &ch_x, &ch_y, &ch_z) != 0) {
	Gen_Error(0, "fatal: Error returned from chaco_input_geom");
	return 0;
      }
    }
 }
#else /* KDDKDD */
  ch_nvtxs = nvtxs;
#endif /* KDDKDD */


  {
     /* Read Chaco assignment file, if requested */
   if (pio_info->init_dist_type == INITIAL_FILE) {
     sprintf(filename, "%s.assign", pio_info->pexo_fname);

   fp = ZOLTAN_FILE_open(filename, "r", pio_info->file_comp);

   if (fp == NULL) {
     sprintf(cmesg, "Error:  Could not open Chaco assignment file %s; "
	     "initial distribution cannot be read",
	     filename);
     Gen_Error(0, cmesg);
     return 0;
   }
   else {
     /* read the coordinates in on processor 0 */
     ch_assignments = (short *) malloc(nvtxs * sizeof(short));
     if (nvtxs && !ch_assignments) {
       Gen_Error(0, "fatal: memory error in read_hypergraph_file");
       return 0;
     }
     /* closes fpassign when done */
     if (chaco_input_assign(fp, filename, ch_nvtxs, ch_assignments) != 0){
       Gen_Error(0, "fatal: Error returned from chaco_input_assign");
       return 0;
     }
   }
   }
 }

  MPI_Bcast(&base, 1, MPI_INT, 0, MPI_COMM_WORLD);

  if (distributed_pins){
    gnhedges = nhedges;
    nhedges = 0;
    hewgt_dim = 0;
    hewgts = NULL;
    for (edge=0; edge<gnhedges; edge++){
      edgeSize = hindex[edge+1] - hindex[edge];
      if (edgeSize > 0) nhedges++;
    }
    hgid = (int *)malloc(nhedges * sizeof(int));
    hvertex_proc = (int *)malloc(npins * sizeof(int));
    nextEdge=0;
    vtx=0;
    for (edge=0; edge<gnhedges; edge++){
      edgeSize = hindex[edge+1] - hindex[edge];
      if (edgeSize > 0){
	hgid[nextEdge] = edge+1;
	if (nextEdge < edge){
	  hindex[nextEdge+1] = hindex[nextEdge] + edgeSize;
	}
	for (vertex=0; vertex<edgeSize; vertex++,vtx++){
	  hvertex_proc[vtx] = ch_dist_proc(hvertex[vtx], NULL, 1);
	}
	nextEdge++;
      }
    }
    gnvtxs = nvtxs;
    nvtxs = ch_dist_num_vtx(Proc, NULL);
    if (ch_start){    /* need to include only vertices this process owns */
      for (i=0,vertex=0; i<gnvtxs; i++){
	if ((ch_start[i+1] > ch_start[vertex]) || /* vtx has adjacencies so it's mine */
	    (ch_dist_proc(i, NULL, 0) == Proc))   /* my vtx with no adjacencies */
	  {
	  if (i > vertex){
	    ch_start[vertex+1] = ch_start[i+1];
	  }
	  vertex++;
	}
      }
    }
#if 0
    debug_lists(Proc, Num_Proc, nhedges, hindex, hvertex, hvertex_proc, hgid);
#endif
  } else{

    /* Distribute hypergraph graph */
    /* Use hypergraph vertex information and chaco edge information. */

    if (!chaco_dist_graph(MPI_COMM_WORLD, pio_info, 0, &gnvtxs, &nvtxs,
	     &ch_start, &ch_adj, &vwgt_dim, &vwgts, &ch_ewgt_dim, &ch_ewgts,
	     &ch_ndim, &ch_x, &ch_y, &ch_z, &ch_assignments) != 0) {
      Gen_Error(0, "fatal: Error returned from chaco_dist_graph");
      return 0;
    }

    if (!dist_hyperedges(MPI_COMM_WORLD, pio_info, 0, base, gnvtxs, &gnhedges,
		       &nhedges, &hgid, &hindex, &hvertex, &hvertex_proc,
		       &hewgt_dim, &hewgts, ch_assignments)) {
      Gen_Error(0, "fatal: Error returned from dist_hyperedges");
      return 0;
    }
  }


  /* Initialize mesh structure for Hypergraph. */
  mesh->data_type = HYPERGRAPH;
  mesh->num_elems = nvtxs;
  mesh->vwgt_dim = vwgt_dim;
  mesh->ewgt_dim = ch_ewgt_dim;
  mesh->elem_array_len = mesh->num_elems + 5;
  mesh->num_dims = ch_ndim;
  mesh->num_el_blks = 1;

  mesh->gnhedges = gnhedges;
  mesh->nhedges = nhedges;
  mesh->hewgt_dim = hewgt_dim;

  mesh->hgid = hgid;
  mesh->hindex = hindex;
  mesh->hvertex = hvertex;
  mesh->hvertex_proc = hvertex_proc;
  mesh->heNumWgts = nhedges;
  mesh->heWgtId = NULL;
  mesh->hewgts = hewgts;


  mesh->eb_etypes = (int *) malloc (5 * mesh->num_el_blks * sizeof(int));
  if (!mesh->eb_etypes) {
    Gen_Error(0, "fatal: insufficient memory");
    return 0;
  }
  mesh->eb_ids = mesh->eb_etypes + mesh->num_el_blks;
  mesh->eb_cnts = mesh->eb_ids + mesh->num_el_blks;
  mesh->eb_nnodes = mesh->eb_cnts + mesh->num_el_blks;
  mesh->eb_nattrs = mesh->eb_nnodes + mesh->num_el_blks;

  mesh->eb_names = (char **) malloc (mesh->num_el_blks * sizeof(char *));
  if (!mesh->eb_names) {
    Gen_Error(0, "fatal: insufficient memory");
    return 0;
  }

  mesh->eb_etypes[0] = -1;
  mesh->eb_ids[0] = 1;
  mesh->eb_cnts[0] = nvtxs;
  mesh->eb_nattrs[0] = 0;
  /*
   * Each element has one set of coordinates (i.e., node) if a coords file
   * was provided; zero otherwise.
   */
  MPI_Bcast( &ch_no_geom, 1, MPI_INT, 0, MPI_COMM_WORLD);
  if (ch_no_geom)
    mesh->eb_nnodes[0] = 0;
  else
    mesh->eb_nnodes[0] = 1;

  /* allocate space for name */
  mesh->eb_names[0] = (char *) malloc((MAX_STR_LENGTH+1) * sizeof(char));
  if (!mesh->eb_names[0]) {
    Gen_Error(0, "fatal: insufficient memory");
    return 0;
  }
  strcpy(mesh->eb_names[0], "hypergraph");

  /* allocate the element structure array */
  mesh->elements = (ELEM_INFO_PTR) malloc (mesh->elem_array_len
					 * sizeof(ELEM_INFO));
  if (!(mesh->elements)) {
    Gen_Error(0, "fatal: insufficient memory");
    return 0;
  }

  /*
   * initialize all of the element structs as unused by
   * setting the globalID to -1
   */
  for (i = 0; i < mesh->elem_array_len; i++)
    initialize_element(&(mesh->elements[i]));

  /*
   * now fill the element structure array with the
   * information from the Chaco file
   * Use hypergraph vertex information and chaco edge information.
   */
  if (!chaco_fill_elements(Proc, Num_Proc, prob, mesh, gnvtxs, nvtxs,
		     ch_start, ch_adj, vwgt_dim, vwgts, ch_ewgt_dim, ch_ewgts,
		     ch_ndim, ch_x, ch_y, ch_z, ch_assignments, base)) {
    Gen_Error(0, "fatal: Error returned from chaco_fill_elements");
    return 0;
  }
#if 0
  debug_elements(Proc, Num_Proc, mesh->num_elems,mesh->elements);
#endif

  safe_free((void **)(void *) &vwgts);
  safe_free((void **)(void *) &ch_ewgts);
  safe_free((void **)(void *) &ch_vwgts);
  safe_free((void **)(void *) &ch_x);
  safe_free((void **)(void *) &ch_y);
  safe_free((void **)(void *) &ch_z);
  safe_free((void **)(void *) &ch_start);
  safe_free((void **)(void *) &ch_adj);
  safe_free((void **)(void *) &ch_assignments);

 if (Debug_Driver > 3)
   print_distributed_mesh(Proc, Num_Proc, mesh);

  DEBUG_TRACE_END(Proc, yo);
  return 1;
}
Exemplo n.º 9
0
PerformanceData run( const typename FixtureType::FEMeshType & mesh ,
                     const int global_max_x ,
                     const int global_max_y ,
                     const int global_max_z ,
                     const unsigned uq_count ,
                     const int steps ,
                     const int print_sample )
{
  typedef Scalar                              scalar_type ;
  typedef FixtureType                         fixture_type ;
  typedef typename fixture_type::device_type  device_type ;

  enum { ElementNodeCount = fixture_type::element_node_count };

  const int total_num_steps = steps ;

  const Scalar user_dt = 5.0e-6;
  //const Scalar  end_time = 0.0050;

  // element block parameters
  const Scalar  lin_bulk_visc = 0.0;
  const Scalar  quad_bulk_visc = 0.0;

  // const Scalar  lin_bulk_visc = 0.06;
  // const Scalar  quad_bulk_visc = 1.2;
  // const Scalar  hg_stiffness = 0.0;
  // const Scalar  hg_viscosity = 0.0;
  // const Scalar  hg_stiffness = 0.03;
  // const Scalar  hg_viscosity = 0.001;

  // material properties
  const Scalar youngs_modulus=1.0e6;
  const Scalar poissons_ratio=0.0;
  const Scalar  density = 8.0e-4;

  const comm::Machine machine = mesh.parallel_data_map.machine ;

  PerformanceData perf_data ;

  Kokkos::Impl::Timer wall_clock ;

  //------------------------------------
  // Generate fields

  typedef Fields< scalar_type , device_type > fields_type ;

  fields_type mesh_fields( mesh , uq_count ,
                           lin_bulk_visc ,
                           quad_bulk_visc ,
                           youngs_modulus ,
                           poissons_ratio ,
                           density );

  typename fields_type::node_coords_type::HostMirror
    model_coords_h = Kokkos::create_mirror( mesh_fields.model_coords );

  typename fields_type::spatial_precise_view::HostMirror
    displacement_h = Kokkos::create_mirror( mesh_fields.displacement );

  typename fields_type::spatial_precise_view::HostMirror
    velocity_h = Kokkos::create_mirror( mesh_fields.velocity );

  Kokkos::deep_copy( model_coords_h , mesh_fields.model_coords );

  //------------------------------------
  // Initialization

  initialize_element( mesh_fields );
  initialize_node(    mesh_fields );

  const Scalar x_bc = global_max_x ;

  // Initial condition on velocity to initiate a pulse along the X axis
  {
    const unsigned X = 0;
    for ( unsigned inode = 0; inode< mesh_fields.num_nodes; ++inode) {
      for ( unsigned kq = 0 ; kq < uq_count ; ++kq ) {
        if ( model_coords_h(inode,X) == 0 ) {
          velocity_h(inode,kq,X) = 1000 + 100 * kq ;
          velocity_h(inode,kq,X) = 1000 + 100 * kq ;
        }
      }
    }
  }

  Kokkos::deep_copy( mesh_fields.velocity , velocity_h );
  Kokkos::deep_copy( mesh_fields.velocity_new , velocity_h );

  //--------------------------------------------------------------------------
  // We will call a sequence of functions.  These functions have been
  // grouped into several functors to balance the number of global memory
  // accesses versus requiring too many registers or too much L1 cache.
  // Global memory accees have read/write cost and memory subsystem contention cost.
  //--------------------------------------------------------------------------

  perf_data.init_time = comm::max( machine , wall_clock.seconds() );

  // Parameters required for the internal force computations.

  perf_data.number_of_steps = total_num_steps ;

  typedef typename
    fields_type::spatial_precise_view::scalar_type  comm_value_type ;

  const unsigned comm_value_count = 6 ;

  Kokkos::AsyncExchange< comm_value_type , device_type ,
                              Kokkos::ParallelDataMap >
    comm_exchange( mesh.parallel_data_map , comm_value_count * uq_count );

  for ( int step = 0; step < total_num_steps; ++step ) {

    //------------------------------------------------------------------------
    // rotate the state variable views.

    swap( mesh_fields.dt ,           mesh_fields.dt_new );
    swap( mesh_fields.displacement , mesh_fields.displacement_new );
    swap( mesh_fields.velocity ,     mesh_fields.velocity_new );
    swap( mesh_fields.rotation ,     mesh_fields.rotation_new );

    //------------------------------------------------------------------------
    // Communicate "send" nodes' displacement and velocity next_state
    // to the ghosted nodes.
    // buffer packages: { { dx , dy , dz , vx , vy , vz }_node }

    wall_clock.reset();

    pack_state( mesh_fields ,
                comm_exchange.buffer(),
                mesh.parallel_data_map.count_interior ,
                mesh.parallel_data_map.count_send );

    comm_exchange.setup();

    comm_exchange.send_receive();

    unpack_state( mesh_fields ,
                  comm_exchange.buffer() ,
                  mesh.parallel_data_map.count_owned ,
                  mesh.parallel_data_map.count_receive );

    device_type::fence();

    perf_data.comm_time += comm::max( machine , wall_clock.seconds() );

    //------------------------------------------------------------------------

    wall_clock.reset();

    // First kernel 'grad_hgop' combines two functions:
    // gradient, velocity gradient
    gradient( mesh_fields );

    // Combine tensor decomposition and rotation functions.
    decomp_rotate( mesh_fields );

    internal_force( mesh_fields , user_dt );

    device_type::fence();

    perf_data.internal_force_time +=
      comm::max( machine , wall_clock.seconds() );

    //------------------------------------------------------------------------
    // Assembly of elements' contributions to nodal force into
    // a nodal force vector.  Update the accelerations, velocities,
    // displacements.
    // The same pattern can be used for matrix-free residual computations.

    wall_clock.reset();

    nodal_update( mesh_fields , x_bc );

    device_type::fence();

    perf_data.central_diff +=
      comm::max( machine , wall_clock.seconds() );

    if ( print_sample && 0 == step % 100 ) {
      Kokkos::deep_copy( displacement_h , mesh_fields.displacement_new );
      Kokkos::deep_copy( velocity_h ,     mesh_fields.velocity_new );

      if ( 1 == print_sample ) {
        for ( unsigned kp = 0 ; kp < uq_count ; ++kp ) {
          std::cout << "step " << step
                    << " : displacement({*,0,0}," << kp << ",0) =" ;
          for ( unsigned i = 0 ; i < mesh_fields.num_nodes_owned ; ++i ) {
            if ( model_coords_h(i,1) == 0 && model_coords_h(i,2) == 0 ) {
                std::cout << " " << displacement_h(i,kp,0);
            }
          }
          std::cout << std::endl ;

          const float tol = 1.0e-6 ;
          const int yb = global_max_y ;
          const int zb = global_max_z ;
          std::cout << "step " << step
                    << " : displacement({*," << yb << "," << zb << "}," << kp << ",0) =" ;
          for ( unsigned i = 0 ; i < mesh_fields.num_nodes_owned ; ++i ) {
            if ( fabs( model_coords_h(i,1) - yb ) < tol &&
                 fabs( model_coords_h(i,2) - zb ) < tol ) {
              std::cout << " " << displacement_h(i,kp,0);
            }
          }
          std::cout << std::endl ;
        }
      }
      else if ( 2 == print_sample ) {
        const unsigned kp = 0 ;

        const float tol = 1.0e-6 ;
        const int xb = global_max_x / 2 ;
        const int yb = global_max_y / 2 ;
        const int zb = global_max_z / 2 ;

        for ( unsigned i = 0 ; i < mesh_fields.num_nodes_owned ; ++i ) {
          if ( fabs( model_coords_h(i,0) - xb ) < tol &&
               fabs( model_coords_h(i,1) - yb ) < tol &&
               fabs( model_coords_h(i,2) - zb ) < tol ) {
            std::cout << "step " << step
                      << " : displacement("
                      << xb << "," << yb << "," << zb << ") = {" 
                      << std::setprecision(6)
                      << " " << displacement_h(i,kp,0)
                      << std::setprecision(2)
                      << " " << displacement_h(i,kp,1)
                      << std::setprecision(2)
                      << " " << displacement_h(i,kp,2)
                      << " }" << std::endl ;
          }
        }
      }
    }
  }

  return perf_data ;
}