int main(int argc, char **argv) { // allocate 4 times as many elements as we need (sparse-ish data) element_t * elements = (element_t *)malloc(NUM_ELEMENTS * 4 * sizeof(element_t)); // allocate two arrays of pointers to elements unsigned num_elements = NUM_ELEMENTS; element_t **list = (element_t **)malloc(num_elements * sizeof(element_t *)); element_t **list2 = (element_t **)malloc(num_elements * sizeof(element_t *)); // point list entries at random elements for (int i = 0 ; i < num_elements ; i ++) { int index = random() % (NUM_ELEMENTS * 4); list[i] = &elements[index]; initialize_element(list[i]); } vp_t *vp = get_a_vp(); // iteratively process the elements for (int step = 0 ; step < NUM_STEPS ; step ++) { // pick two random elements element_t *e_cull = list[random() % num_elements]; element_t *e_update = list[random() % num_elements]; // select those elements close to the element of interest unsigned num_elements2 = 0; for (int i = 0 ; i < num_elements ; i ++) { if (!cull(list[i], e_cull)) { list2[num_elements2] = list[i]; num_elements2 ++; } } for (int i = 0 ; i < num_elements2 ; i ++) { transform(list2[i], vp); } // update based on the update element for (int i = 0 ; i < num_elements2 ; i ++) { update(list2[i], e_update); } } }
void migrate_pre_process(void *data, int num_gid_entries, int num_lid_entries, int num_import, ZOLTAN_ID_PTR import_global_ids, ZOLTAN_ID_PTR import_local_ids, int *import_procs, int *import_to_part, int num_export, ZOLTAN_ID_PTR export_global_ids, ZOLTAN_ID_PTR export_local_ids, int *export_procs, int *export_to_part, int *ierr) { int lid = num_lid_entries-1; int gid = num_gid_entries-1; char msg[256]; *ierr = ZOLTAN_OK; if (data == NULL) { *ierr = ZOLTAN_FATAL; return; } MESH_INFO_PTR mesh = (MESH_INFO_PTR) data; ELEM_INFO_PTR elements = mesh->elements; /* * Set some flags. Assume if true for one element, true for all elements. * Note that some procs may have no elements. */ int k = 0; if (elements[0].edge_wgt != NULL) k = 1; /* Make sure all procs have the same value */ MPI_Allreduce(&k, &Use_Edge_Wgts, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); /* * For all elements, update adjacent elements' processor information. * That way, when perform migration, will be migrating updated adjacency * information. */ int proc = 0; MPI_Comm_rank(MPI_COMM_WORLD, &proc); /* * Build New_Elem_Index array and list of processor assignments. */ New_Elem_Index_Size = mesh->num_elems + num_import - num_export; if (mesh->elem_array_len > New_Elem_Index_Size) New_Elem_Index_Size = mesh->elem_array_len; New_Elem_Index = new ZOLTAN_ID_TYPE [New_Elem_Index_Size]; int *proc_ids = NULL; char *change = NULL; if (mesh->num_elems > 0) { proc_ids = new int [mesh->num_elems]; change = new char [mesh->num_elems]; if (New_Elem_Index == NULL || proc_ids == NULL || change == NULL) { Gen_Error(0, "fatal: insufficient memory"); *ierr = ZOLTAN_MEMERR; if (proc_ids) delete [] proc_ids; if (change) delete [] change; if (New_Elem_Index) { delete [] New_Elem_Index; New_Elem_Index = NULL; } return; } for (int i = 0; i < mesh->num_elems; i++) { New_Elem_Index[i] = elements[i].globalID; proc_ids[i] = proc; change[i] = 0; } } for (int i = mesh->num_elems; i < New_Elem_Index_Size; i++) { New_Elem_Index[i] = ZOLTAN_ID_INVALID; } for (int i = 0; i < num_export; i++) { int exp_elem = 0; if (num_lid_entries) exp_elem = export_local_ids[lid+i*num_lid_entries]; else /* testing num_lid_entries == 0 */ search_by_global_id(mesh, export_global_ids[gid+i*num_gid_entries], &exp_elem); if (export_procs[i] != proc) { /* Export is moving to a new processor */ New_Elem_Index[exp_elem] = ZOLTAN_ID_INVALID; proc_ids[exp_elem] = export_procs[i]; } } for (int i = 0; i < num_import; i++) { if (import_procs[i] != proc) { /* Import is moving from a new processor, not just from a new partition */ /* search for first free location */ int j=0; for (j = 0; j < New_Elem_Index_Size; j++) if (New_Elem_Index[j] == ZOLTAN_ID_INVALID) break; New_Elem_Index[j] = import_global_ids[gid+i*num_gid_entries]; } } /* * Update local information */ /* Set change flag for elements whose adjacent elements are being exported */ for (int i = 0; i < num_export; i++) { int exp_elem = 0; if (num_lid_entries) exp_elem = export_local_ids[lid+i*num_lid_entries]; else /* testing num_lid_entries == 0 */ search_by_global_id(mesh, export_global_ids[gid+i*num_gid_entries], &exp_elem); elements[exp_elem].my_part = export_to_part[i]; if (export_procs[i] == proc) continue; /* No adjacency changes needed if export is changing only partition, not processor. */ for (int j = 0; j < elements[exp_elem].adj_len; j++) { /* Skip NULL adjacencies (sides that are not adjacent to another elem). */ if (elements[exp_elem].adj[j] == ZOLTAN_ID_INVALID) continue; /* Set change flag for adjacent local elements. */ if (elements[exp_elem].adj_proc[j] == proc) { change[elements[exp_elem].adj[j]] = 1; } } } /* Change adjacency information in marked elements */ for (int i = 0; i < mesh->num_elems; i++) { if (change[i] == 0) continue; /* loop over marked element's adjacencies; look for ones that are moving */ for (int j = 0; j < elements[i].adj_len; j++) { /* Skip NULL adjacencies (sides that are not adjacent to another elem). */ if (elements[i].adj[j] == ZOLTAN_ID_INVALID) continue; if (elements[i].adj_proc[j] == proc) { /* adjacent element is local; check whether it is moving. */ int new_proc = proc_ids[elements[i].adj[j]]; if (new_proc != proc) { /* Adjacent element is being exported; update this adjacency entry */ elements[i].adj[j] = elements[elements[i].adj[j]].globalID; elements[i].adj_proc[j] = new_proc; } } } } delete [] change; /* * Update off-processor information */ int maxlen = 0; int *send_vec = NULL; for (int i = 0; i < mesh->necmap; i++) maxlen += mesh->ecmap_cnt[i]; if (maxlen > 0) { send_vec = new int [maxlen]; if (send_vec == NULL) { Gen_Error(0, "fatal: insufficient memory"); *ierr = ZOLTAN_MEMERR; delete [] proc_ids; delete [] change; return; } /* Load send vector */ for (int i = 0; i < maxlen; i++) send_vec[i] = proc_ids[mesh->ecmap_elemids[i]]; } delete [] proc_ids; int *recv_vec = NULL; if (maxlen > 0) recv_vec = new int [maxlen]; /* Perform boundary exchange */ boundary_exchange(mesh, 1, send_vec, recv_vec); /* Unload receive vector */ int offset = 0; for (int i = 0; i < mesh->necmap; i++) { for (int j = 0; j < mesh->ecmap_cnt[i]; j++, offset++) { if (recv_vec[offset] == mesh->ecmap_id[i]) { /* off-processor element is not changing processors. */ /* no changes are needed in the local data structure. */ continue; } /* Change processor assignment in local element's adjacency list */ int bor_elem = mesh->ecmap_elemids[offset]; for (k = 0; k < elements[bor_elem].adj_len; k++) { /* Skip NULL adjacencies (sides that are not adj to another elem). */ if (elements[bor_elem].adj[k] == ZOLTAN_ID_INVALID) continue; if (elements[bor_elem].adj[k] == mesh->ecmap_neighids[offset] && elements[bor_elem].adj_proc[k] == mesh->ecmap_id[i]) { elements[bor_elem].adj_proc[k] = recv_vec[offset]; if (recv_vec[offset] == proc) { /* element is moving to this processor; */ /* convert adj from global to local ID. */ int idx = in_list(mesh->ecmap_neighids[offset],New_Elem_Index_Size, New_Elem_Index); if (idx == -1) { sprintf(msg, "fatal: unable to locate element " ZOLTAN_ID_SPEC " in " "New_Elem_Index", mesh->ecmap_neighids[offset]); Gen_Error(0, msg); *ierr = ZOLTAN_FATAL; if (send_vec) delete [] send_vec; if (recv_vec) delete [] recv_vec; return; } elements[bor_elem].adj[k] = idx; } break; /* from k loop */ } } } } if (recv_vec) delete [] recv_vec; if (send_vec) delete [] send_vec; /* * Allocate space (if needed) for the new element data. */ if (mesh->elem_array_len < New_Elem_Index_Size) { mesh->elem_array_len = New_Elem_Index_Size; // We don't use C++ new/delete here, because this was malloc'd // in some C code. mesh->elements = (ELEM_INFO_PTR) realloc (mesh->elements, mesh->elem_array_len * sizeof(ELEM_INFO)); if (mesh->elements == NULL) { Gen_Error(0, "fatal: insufficient memory"); return; } /* initialize the new spots */ for (int i = mesh->num_elems; i < mesh->elem_array_len; i++) initialize_element(&(mesh->elements[i])); } }
int read_exoII_file(int Proc, int Num_Proc, PROB_INFO_PTR prob, PARIO_INFO_PTR pio_info, MESH_INFO_PTR mesh) { #ifndef ZOLTAN_NEMESIS Gen_Error(0, "Fatal: Nemesis requested but not linked with driver."); return 0; #else /* ZOLTAN_NEMESIS */ /* Local declarations. */ char *yo = "read_exoII_mesh"; char par_nem_fname[FILENAME_MAX+1], title[MAX_LINE_LENGTH+1]; char cmesg[256]; float ver; int i, pexoid, cpu_ws = 0, io_ws = 0; int *nnodes = NULL, *etypes = NULL; #ifdef DEBUG_EXO int j, k, elem; #endif FILE *fdtmp; /***************************** BEGIN EXECUTION ******************************/ DEBUG_TRACE_START(Proc, yo); /* since this is a test driver, set error reporting in exodus */ ex_opts(EX_VERBOSE | EX_DEBUG); /* generate the parallel filename for this processor */ gen_par_filename(pio_info->pexo_fname, par_nem_fname, pio_info, Proc, Num_Proc); /* * check whether parallel file exists. do the check with fopen * as ex_open coredumps on the paragon when files do not exist. */ if ((fdtmp = fopen(par_nem_fname, "r")) == NULL) { sprintf(cmesg,"fatal: parallel Exodus II file %s does not exist", par_nem_fname); Gen_Error(0, cmesg); return 0; } else fclose(fdtmp); /* * now open the existing parallel file using Exodus calls. */ if ((pexoid = ex_open(par_nem_fname, EX_READ, &cpu_ws, &io_ws, &ver)) < 0) { sprintf(cmesg,"fatal: could not open parallel Exodus II file %s", par_nem_fname); Gen_Error(0, cmesg); return 0; } /* and get initial information */ if (ex_get_init(pexoid, title, &(mesh->num_dims), &(mesh->num_nodes), &(mesh->num_elems), &(mesh->num_el_blks), &(mesh->num_node_sets), &(mesh->num_side_sets)) < 0) { Gen_Error(0, "fatal: Error returned from ex_get_init"); return 0; } /* alocate some memory for the element blocks */ mesh->data_type = MESH; mesh->vwgt_dim = 1; /* One weight for now. */ mesh->ewgt_dim = 1; /* One weight for now. */ mesh->eb_etypes = (int *) malloc (5 * mesh->num_el_blks * sizeof(int)); if (!mesh->eb_etypes) { Gen_Error(0, "fatal: insufficient memory"); return 0; } mesh->eb_ids = mesh->eb_etypes + mesh->num_el_blks; mesh->eb_cnts = mesh->eb_ids + mesh->num_el_blks; mesh->eb_nnodes = mesh->eb_cnts + mesh->num_el_blks; mesh->eb_nattrs = mesh->eb_nnodes + mesh->num_el_blks; mesh->eb_names = (char **) malloc (mesh->num_el_blks * sizeof(char *)); if (!mesh->eb_names) { Gen_Error(0, "fatal: insufficient memory"); return 0; } mesh->hindex = (int *) malloc(sizeof(int)); mesh->hindex[0] = 0; if (ex_get_elem_blk_ids(pexoid, mesh->eb_ids) < 0) { Gen_Error(0, "fatal: Error returned from ex_get_elem_blk_ids"); return 0; } /* allocate temporary storage for items needing global reduction. */ /* nemesis does not store most element block info about blocks for */ /* which the processor owns no elements. */ /* we, however, use this information in migration, so we need to */ /* accumulate it for all element blocks. kdd 2/2001 */ if (mesh->num_el_blks > 0) { nnodes = (int *) malloc(2 * mesh->num_el_blks * sizeof(int)); if (!nnodes) { Gen_Error(0, "fatal: insufficient memory"); return 0; } etypes = nnodes + mesh->num_el_blks; } /* get the element block information */ for (i = 0; i < mesh->num_el_blks; i++) { /* allocate space for name */ mesh->eb_names[i] = (char *) malloc((MAX_STR_LENGTH+1) * sizeof(char)); if (!mesh->eb_names[i]) { Gen_Error(0, "fatal: insufficient memory"); return 0; } if (ex_get_elem_block(pexoid, mesh->eb_ids[i], mesh->eb_names[i], &(mesh->eb_cnts[i]), &(nnodes[i]), &(mesh->eb_nattrs[i])) < 0) { Gen_Error(0, "fatal: Error returned from ex_get_elem_block"); return 0; } if (mesh->eb_cnts[i] > 0) { if ((etypes[i] = (int) get_elem_type(mesh->eb_names[i], nnodes[i], mesh->num_dims)) == E_TYPE_ERROR) { Gen_Error(0, "fatal: could not get element type"); return 0; } } else etypes[i] = (int) NULL_EL; } /* Perform reduction on necessary fields of element blocks. kdd 2/2001 */ MPI_Allreduce(nnodes, mesh->eb_nnodes, mesh->num_el_blks, MPI_INT, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(etypes, mesh->eb_etypes, mesh->num_el_blks, MPI_INT, MPI_MIN, MPI_COMM_WORLD); for (i = 0; i < mesh->num_el_blks; i++) { strcpy(mesh->eb_names[i], get_elem_name(mesh->eb_etypes[i])); } free(nnodes); /* * allocate memory for the elements * allocate a little extra for element migration latter */ mesh->elem_array_len = mesh->num_elems + 5; mesh->elements = (ELEM_INFO_PTR) malloc (mesh->elem_array_len * sizeof(ELEM_INFO)); if (!(mesh->elements)) { Gen_Error(0, "fatal: insufficient memory"); return 0; } /* * intialize all of the element structs as unused by * setting the globalID to -1 */ for (i = 0; i < mesh->elem_array_len; i++) initialize_element(&(mesh->elements[i])); /* read the information for the individual elements */ if (!read_elem_info(pexoid, Proc, prob, mesh)) { Gen_Error(0, "fatal: Error returned from read_elem_info"); return 0; } /* read the communication information */ if (!read_comm_map_info(pexoid, Proc, prob, mesh)) { Gen_Error(0, "fatal: Error returned from read_comm_map_info"); return 0; } /* Close the parallel file */ if(ex_close (pexoid) < 0) { Gen_Error(0, "fatal: Error returned from ex_close"); return 0; } /* print out the distributed mesh */ if (Debug_Driver > 3) print_distributed_mesh(Proc, Num_Proc, mesh); DEBUG_TRACE_END(Proc, yo); return 1; #endif /* ZOLTAN_NEMESIS */ }
int chaco_setup_mesh_struct( int Proc, int Num_Proc, PROB_INFO_PTR prob, /* problem description */ MESH_INFO_PTR mesh, /* mesh information for the problem */ int gnvtxs, /* global number of vertices across all procs*/ int nvtxs, /* number of vertices in local graph */ int *start, /* start of edge list for each vertex */ int *adj, /* edge list data */ int vwgt_dim, /* # of weights per vertex */ float *vwgts, /* vertex weight list data */ int ewgt_dim, /* # of weights per edge */ float *ewgts, /* edge weight list data */ int ndim, /* dimension of the geometry */ float *x, /* x-coordinates of the vertices */ float *y, /* y-coordinates of the vertices */ float *z, /* z-coordinates of the vertices */ short *assignments, /* assignments from Chaco file; may be NULL */ int base, /* smallest vertex number to use; base == 1 for Chaco; may be 0 or 1 for HG files. */ int no_geom /* flag indicating whether coords are avail. */ ) { const char *yo = "chaco_setup_mesh_struct"; int i; DEBUG_TRACE_START(Proc, yo); /* Initialize mesh structure for Chaco mesh. */ mesh->data_type = ZOLTAN_GRAPH; mesh->vwgt_dim = vwgt_dim; mesh->ewgt_dim = ewgt_dim; mesh->num_elems = nvtxs; mesh->elem_array_len = mesh->num_elems + 5; mesh->num_dims = ndim; mesh->num_el_blks = 1; mesh->eb_etypes = (int *) malloc (5 * mesh->num_el_blks * sizeof(int)); if (!mesh->eb_etypes) { Gen_Error(0, "fatal: insufficient memory"); return 0; } mesh->eb_ids = mesh->eb_etypes + mesh->num_el_blks; mesh->eb_cnts = mesh->eb_ids + mesh->num_el_blks; mesh->eb_nnodes = mesh->eb_cnts + mesh->num_el_blks; mesh->eb_nattrs = mesh->eb_nnodes + mesh->num_el_blks; mesh->eb_names = (char **) malloc (mesh->num_el_blks * sizeof(char *)); if (!mesh->eb_names) { Gen_Error(0, "fatal: insufficient memory"); return 0; } mesh->eb_etypes[0] = -1; mesh->eb_ids[0] = 1; mesh->eb_cnts[0] = nvtxs; mesh->eb_nattrs[0] = 0; mesh->hindex = (int *) malloc(sizeof(int)); mesh->hindex[0] = 0; /* * Each element has one set of coordinates (i.e., node) if a coords file * was provided; zero otherwise. */ MPI_Bcast( &no_geom, 1, MPI_INT, 0, MPI_COMM_WORLD); if (no_geom) mesh->eb_nnodes[0] = 0; else mesh->eb_nnodes[0] = 1; /* allocate space for name */ mesh->eb_names[0] = (char *) malloc((MAX_STR_LENGTH+1) * sizeof(char)); if (!mesh->eb_names[0]) { Gen_Error(0, "fatal: insufficient memory"); return 0; } strcpy(mesh->eb_names[0], "chaco"); /* allocate the element structure array */ mesh->elements = (ELEM_INFO_PTR) malloc (mesh->elem_array_len * sizeof(ELEM_INFO)); if (!(mesh->elements)) { Gen_Error(0, "fatal: insufficient memory"); return 0; } /* * intialize all of the element structs as unused by * setting the globalID to -1 */ for (i = 0; i < mesh->elem_array_len; i++) initialize_element(&(mesh->elements[i])); /* * now fill the element structure array with the * information from the Chaco file */ if (!chaco_fill_elements(Proc, Num_Proc, prob, mesh, gnvtxs, nvtxs, start, adj, vwgt_dim, vwgts, ewgt_dim, ewgts, ndim, x, y, z, assignments, 1)) { Gen_Error(0, "fatal: Error returned from chaco_fill_elements"); return 0; } DEBUG_TRACE_END(Proc, yo); return 1; }
void migrate_pre_process(void *data, int num_gid_entries, int num_lid_entries, int num_import, ZOLTAN_ID_PTR import_global_ids, ZOLTAN_ID_PTR import_local_ids, int *import_procs, int *import_to_part, int num_export, ZOLTAN_ID_PTR export_global_ids, ZOLTAN_ID_PTR export_local_ids, int *export_procs, int *export_to_part, int *ierr) { int i, j, k, idx, maxlen, proc, offset; int *proc_ids = NULL; /* Temp array of processor assignments for elements.*/ char *change = NULL; /* Temp array indicating whether local element's adj list must be updated due to a nbor's migration. */ int new_proc; /* New processor assignment for nbor element. */ int exp_elem; /* index of an element being exported */ int bor_elem; /* index of an element along the processor border */ int *send_vec = NULL, *recv_vec = NULL; /* Communication vecs. */ MESH_INFO_PTR mesh; ELEM_INFO_PTR elements; int lid = num_lid_entries-1; int gid = num_gid_entries-1; char msg[256]; *ierr = ZOLTAN_OK; if (data == NULL) { *ierr = ZOLTAN_FATAL; return; } mesh = (MESH_INFO_PTR) data; elements = mesh->elements; for (i=0; i < mesh->num_elems; i++) { /* don't migrate a pointer created on this process */ safe_free((void **)(void *)&(elements[i].adj_blank)); } /* * Set some flags. Assume if true for one element, true for all elements. * Note that some procs may have no elements. */ if (elements[0].edge_wgt != NULL) k = 1; else k = 0; /* Make sure all procs have the same value */ MPI_Allreduce(&k, &Use_Edge_Wgts, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); /* NOT IMPLEMENTED: blanking information is not sent along. Subsequent lb_eval may be incorrect, since imported elements may have blanked adjacencies. if (mesh->blank_count > 0) k = 1; else k = 0; MPI_Allreduce(&k, &Vertex_Blanking, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); */ /* * For all elements, update adjacent elements' processor information. * That way, when perform migration, will be migrating updated adjacency * information. */ MPI_Comm_rank(MPI_COMM_WORLD, &proc); /* * Build New_Elem_Index array and list of processor assignments. */ New_Elem_Index_Size = mesh->num_elems + num_import - num_export; if (mesh->elem_array_len > New_Elem_Index_Size) New_Elem_Index_Size = mesh->elem_array_len; New_Elem_Index = (int *) malloc(New_Elem_Index_Size * sizeof(int)); New_Elem_Hash_Table = (int *) malloc(New_Elem_Index_Size * sizeof(int)); New_Elem_Hash_Nodes = (struct New_Elem_Hash_Node *) malloc(New_Elem_Index_Size * sizeof(struct New_Elem_Hash_Node)); if (New_Elem_Index == NULL || New_Elem_Hash_Table == NULL || New_Elem_Hash_Nodes == NULL) { Gen_Error(0, "fatal: insufficient memory"); *ierr = ZOLTAN_MEMERR; return; } for (i = 0; i < New_Elem_Index_Size; i++) New_Elem_Hash_Table[i] = -1; for (i = 0; i < New_Elem_Index_Size; i++) { New_Elem_Hash_Nodes[i].globalID = -1; New_Elem_Hash_Nodes[i].localID = -1; New_Elem_Hash_Nodes[i].next = -1; } if (mesh->num_elems > 0) { proc_ids = (int *) malloc(mesh->num_elems * sizeof(int)); change = (char *) malloc(mesh->num_elems * sizeof(char)); if (New_Elem_Index == NULL || proc_ids == NULL || change == NULL || New_Elem_Hash_Table == NULL || New_Elem_Hash_Nodes == NULL) { Gen_Error(0, "fatal: insufficient memory"); *ierr = ZOLTAN_MEMERR; return; } for (i = 0; i < mesh->num_elems; i++) { New_Elem_Index[i] = elements[i].globalID; insert_in_hash(elements[i].globalID, i); proc_ids[i] = proc; change[i] = 0; } } for (i = mesh->num_elems; i < New_Elem_Index_Size; i++) { New_Elem_Index[i] = -1; } for (i = 0; i < num_export; i++) { if (num_lid_entries) exp_elem = export_local_ids[lid+i*num_lid_entries]; else /* testing num_lid_entries == 0 */ search_by_global_id(mesh, export_global_ids[gid+i*num_gid_entries], &exp_elem); if (export_procs[i] != proc) { /* Export is moving to a new processor */ New_Elem_Index[exp_elem] = -1; remove_from_hash(export_global_ids[gid+i*num_gid_entries]); proc_ids[exp_elem] = export_procs[i]; } } j = 0; for (i = 0; i < num_import; i++) { if (import_procs[i] != proc) { /* Import is moving from a new processor, not just from a new partition */ /* search for first free location */ for ( ; j < New_Elem_Index_Size; j++) if (New_Elem_Index[j] == -1) break; New_Elem_Index[j] = import_global_ids[gid+i*num_gid_entries]; insert_in_hash((int) import_global_ids[gid+i*num_gid_entries], j); } } /* * Update local information */ /* Set change flag for elements whose adjacent elements are being exported */ for (i = 0; i < num_export; i++) { if (num_lid_entries) exp_elem = export_local_ids[lid+i*num_lid_entries]; else /* testing num_lid_entries == 0 */ search_by_global_id(mesh, export_global_ids[gid+i*num_gid_entries], &exp_elem); elements[exp_elem].my_part = export_to_part[i]; if (export_procs[i] == proc) continue; /* No adjacency changes needed if export is changing only partition, not processor. */ for (j = 0; j < elements[exp_elem].adj_len; j++) { /* Skip NULL adjacencies (sides that are not adjacent to another elem). */ if (elements[exp_elem].adj[j] == -1) continue; /* Set change flag for adjacent local elements. */ if (elements[exp_elem].adj_proc[j] == proc) { change[elements[exp_elem].adj[j]] = 1; } } } /* Change adjacency information in marked elements */ for (i = 0; i < mesh->num_elems; i++) { if (change[i] == 0) continue; /* loop over marked element's adjacencies; look for ones that are moving */ for (j = 0; j < elements[i].adj_len; j++) { /* Skip NULL adjacencies (sides that are not adjacent to another elem). */ if (elements[i].adj[j] == -1) continue; if (elements[i].adj_proc[j] == proc) { /* adjacent element is local; check whether it is moving. */ if ((new_proc = proc_ids[elements[i].adj[j]]) != proc) { /* Adjacent element is being exported; update this adjacency entry */ elements[i].adj[j] = elements[elements[i].adj[j]].globalID; elements[i].adj_proc[j] = new_proc; } } } } safe_free((void **)(void *) &change); /* * Update off-processor information */ maxlen = 0; for (i = 0; i < mesh->necmap; i++) maxlen += mesh->ecmap_cnt[i]; if (maxlen > 0) { send_vec = (int *) malloc(maxlen * sizeof(int)); if (send_vec == NULL) { Gen_Error(0, "fatal: insufficient memory"); *ierr = ZOLTAN_MEMERR; return; } /* Load send vector */ for (i = 0; i < maxlen; i++) send_vec[i] = proc_ids[mesh->ecmap_elemids[i]]; } safe_free((void **)(void *) &proc_ids); if (maxlen > 0) recv_vec = (int *) malloc(maxlen * sizeof(int)); /* Perform boundary exchange */ boundary_exchange(mesh, 1, send_vec, recv_vec); /* Unload receive vector */ offset = 0; for (i = 0; i < mesh->necmap; i++) { for (j = 0; j < mesh->ecmap_cnt[i]; j++, offset++) { if (recv_vec[offset] == mesh->ecmap_id[i]) { /* off-processor element is not changing processors. */ /* no changes are needed in the local data structure. */ continue; } /* Change processor assignment in local element's adjacency list */ bor_elem = mesh->ecmap_elemids[offset]; for (k = 0; k < elements[bor_elem].adj_len; k++) { /* Skip NULL adjacencies (sides that are not adj to another elem). */ if (elements[bor_elem].adj[k] == -1) continue; if (elements[bor_elem].adj[k] == mesh->ecmap_neighids[offset] && elements[bor_elem].adj_proc[k] == mesh->ecmap_id[i]) { elements[bor_elem].adj_proc[k] = recv_vec[offset]; if (recv_vec[offset] == proc) { /* element is moving to this processor; */ /* convert adj from global to local ID. */ idx = find_in_hash(mesh->ecmap_neighids[offset]); if (idx >= 0) idx = New_Elem_Hash_Nodes[idx].localID; else { sprintf(msg, "fatal: unable to locate element %d in " "New_Elem_Index", mesh->ecmap_neighids[offset]); Gen_Error(0, msg); *ierr = ZOLTAN_FATAL; return; } elements[bor_elem].adj[k] = idx; } break; /* from k loop */ } } } } safe_free((void **)(void *) &recv_vec); safe_free((void **)(void *) &send_vec); /* * Allocate space (if needed) for the new element data. */ if (mesh->elem_array_len < New_Elem_Index_Size) { mesh->elem_array_len = New_Elem_Index_Size; mesh->elements = (ELEM_INFO_PTR) realloc (mesh->elements, mesh->elem_array_len * sizeof(ELEM_INFO)); if (mesh->elements == NULL) { Gen_Error(0, "fatal: insufficient memory"); return; } /* initialize the new spots */ for (i = mesh->num_elems; i < mesh->elem_array_len; i++) initialize_element(&(mesh->elements[i])); } }
static int setup_mesh_struct( int Proc, int Num_Proc, PROB_INFO_PTR prob, /* problem description */ MESH_INFO_PTR mesh, /* mesh information for the problem */ PARIO_INFO_PTR pio_info, /* element distribution info*/ ZOLTAN_ID_TYPE gnvtxs, /* global number of vertices across all procs*/ int nvtxs, /* number of vertices in local graph */ int *start, /* start of edge list for each vertex */ ZOLTAN_ID_TYPE *adj, /* edge list data */ int vwgt_dim, /* # of weights per vertex */ float *vwgts, /* vertex weight list data */ int ewgt_dim, /* # of weights per edge */ float *ewgts, /* edge weight list data */ int ndim, /* dimension of the geometry */ float *x, /* x-coordinates of the vertices */ float *y, /* y-coordinates of the vertices */ float *z /* z-coordinates of the vertices */ ) { const char *yo = "setup_mesh_struct"; int i, j, k; ZOLTAN_ID_TYPE elem_id; ZOLTAN_ID_TYPE min_vtx; DEBUG_TRACE_START(Proc, yo); /* Initialize mesh structure for Chaco mesh. */ mesh->data_type = ZOLTAN_GRAPH; mesh->vwgt_dim = vwgt_dim; mesh->ewgt_dim = ewgt_dim; mesh->num_elems = nvtxs; mesh->elem_array_len = mesh->num_elems + 5; mesh->num_dims = ndim; mesh->num_el_blks = 1; mesh->eb_etypes = (int *) malloc (4 * mesh->num_el_blks * sizeof(int)); if (!mesh->eb_etypes) { Gen_Error(0, "fatal: insufficient memory"); return 0; } mesh->eb_ids = mesh->eb_etypes + mesh->num_el_blks; mesh->eb_nnodes = mesh->eb_ids + mesh->num_el_blks; mesh->eb_nattrs = mesh->eb_nnodes + mesh->num_el_blks; mesh->eb_cnts = (ZOLTAN_ID_TYPE *) malloc (mesh->num_el_blks * sizeof(ZOLTAN_ID_TYPE)); if (!mesh->eb_cnts) { Gen_Error(0, "fatal: insufficient memory"); return 0; } mesh->eb_names = (char **) malloc (mesh->num_el_blks * sizeof(char *)); if (!mesh->eb_names) { Gen_Error(0, "fatal: insufficient memory"); return 0; } mesh->eb_etypes[0] = -1; mesh->eb_ids[0] = 1; mesh->eb_cnts[0] = (ZOLTAN_ID_TYPE)nvtxs; mesh->eb_nattrs[0] = 0; mesh->hindex = (int *) malloc(sizeof(int)); mesh->hindex[0] = 0; mesh->eb_nnodes[0] = 1; /* allocate space for name */ mesh->eb_names[0] = (char *) malloc(16* sizeof(char)); if (!mesh->eb_names[0]) { Gen_Error(0, "fatal: insufficient memory"); return 0; } strcpy(mesh->eb_names[0], "random-graph"); /* allocate the element structure array */ mesh->elements = (ELEM_INFO_PTR) malloc (mesh->elem_array_len * sizeof(ELEM_INFO)); if (!(mesh->elements)) { Gen_Error(0, "fatal: insufficient memory"); return 0; } /* write element data */ for (i = 0; i < mesh->elem_array_len; i++) initialize_element(&(mesh->elements[i])); min_vtx = local_to_global_id_map(0, Proc); for (i = 0; i < nvtxs; i++) { mesh->elements[i].globalID = local_to_global_id_map(i, Proc); if (vwgts != NULL){ for (j=0; j<vwgt_dim; j++) { mesh->elements[i].cpu_wgt[j] = vwgts[i*vwgt_dim+j]; } } else mesh->elements[i].cpu_wgt[0] = 1.0; mesh->elements[i].elem_blk = 0; mesh->elements[i].my_part = Proc; if (mesh->num_dims > 0) { /* One set of coords per element. */ mesh->elements[i].connect = (ZOLTAN_ID_TYPE *) malloc(sizeof(ZOLTAN_ID_TYPE)); mesh->elements[i].connect[0] = mesh->elements[i].globalID; mesh->elements[i].coord = (float **) malloc(sizeof(float *)); mesh->elements[i].coord[0] = (float *) calloc(mesh->num_dims, sizeof(float)); mesh->elements[i].coord[0][0] = x[i]; mesh->elements[i].avg_coord[0] = x[i]; if (mesh->num_dims > 1) { mesh->elements[i].coord[0][1] = y[i]; mesh->elements[i].avg_coord[1] = y[i]; if (mesh->num_dims > 2) { mesh->elements[i].coord[0][2] = z[i]; mesh->elements[i].avg_coord[2] = z[i]; } } } } for (i = 0; i < nvtxs; i++) { /* now start with the adjacencies */ if (start != NULL) mesh->elements[i].nadj = start[i+1] - start[i]; else mesh->elements[i].nadj = 0; if (mesh->elements[i].nadj > 0) { mesh->elements[i].adj_len = mesh->elements[i].nadj; mesh->elements[i].adj = (ZOLTAN_ID_TYPE *) malloc (mesh->elements[i].nadj * sizeof(ZOLTAN_ID_TYPE)); mesh->elements[i].adj_proc = (int *) malloc (mesh->elements[i].nadj * sizeof(int)); if (!(mesh->elements[i].adj) || !(mesh->elements[i].adj_proc)) { Gen_Error(0, "fatal: insufficient memory"); return 0; } if (ewgts != NULL) { mesh->elements[i].edge_wgt = (float *) malloc (mesh->elements[i].nadj * sizeof(float)); if (!(mesh->elements[i].edge_wgt)) { Gen_Error(0, "fatal: insufficient memory"); return 0; } } else mesh->elements[i].edge_wgt = NULL; for (j = 0; j < mesh->elements[i].nadj; j++) { elem_id = adj[start[i] + j]; k = global_to_proc_owner_map(elem_id, Num_Proc, Proc); /* * if the adjacent element is on this processor * then find the local id for that element */ if (k == Proc) mesh->elements[i].adj[j] = elem_id-min_vtx; else /* use the global id */ mesh->elements[i].adj[j] = elem_id; mesh->elements[i].adj_proc[j] = k; if (ewgts != NULL) mesh->elements[i].edge_wgt[j] = ewgts[start[i] + j]; } } /* End: "if (mesh->elements[i].nadj > 0)" */ } /* End: "for (i = 0; i < mesh->num_elems; i++)" */ if (!build_elem_comm_maps(Proc, mesh)) { Gen_Error(0, "Fatal: error building initial elem comm maps"); return 0; } if (Debug_Driver > 3) print_distributed_mesh(Proc, Num_Proc, mesh); DEBUG_TRACE_END(Proc, yo); return 1; }
/* Read "matrixmarket plus", the format written by Zoltan_Generate_Files. * * This format is our own extension of the NIST Matrix Market file * format. We wished to store vertex and edge weights, and also * pin, vertex weight and edge weight ownership data in the file. * Here are some rules from the NIST design document: * 1. lines are limited to 1024 characters * 2. blank lines may appear anywhere after the first line * 3. numeric data on a line is separated by one or more blanks * 4. real data is in floating-point decimal format, can use "e" notation * 5. all indices are 1-based * 6. character data may be upper or lower case. * * The contents of the file reflects the data returned by the * application in the hypergraph query functions. In particular: * * Each process supplied some subset of pins to Zoltan. Each owned * some of the vertices and supplied weights for those. Each may have * supplied weights for edges. The edges need not be the edges of * their pins. More than one process may have supplied a weight for * the same edge. */ int read_mtxplus_file( int Proc, int Num_Proc, PROB_INFO_PTR prob, PARIO_INFO_PTR pio_info, MESH_INFO_PTR mesh ) { /* Local declarations. */ const char *yo = "read_mtxplus_file"; char filename[256], cmesg[256]; struct stat statbuf; int rc, fsize, i, j; char *filebuf=NULL; FILE* fp; int nGlobalEdges, nGlobalVtxs, vtxWDim, edgeWDim; int nMyPins, nMyVtx, nMyEdgeWgts; int *myPinI, *myPinJ, *myVtxNum, *myEWGno; float *myVtxWgts, *myEdgeWgts; int status; int numHEdges; int *edgeGno, *edgeIdx, *pinGno; DEBUG_TRACE_START(Proc, yo); /* Process 0 reads the file and broadcasts it */ if (Proc == 0) { fsize = 0; sprintf(filename, "%s.mtxp", pio_info->pexo_fname); if (pio_info->file_comp == GZIP) sprintf(filename, "%s.gz", filename); rc = stat(filename, &statbuf); if (rc == 0){ fsize = statbuf.st_size; fp = fopen(filename, "r"); if (!fp){ fsize = 0; } else{ filebuf = (char *)malloc(fsize+1); rc = fread(filebuf, 1, fsize, fp); if (rc != fsize){ free(filebuf); fsize = 0; fp = NULL; } else{ filebuf[fsize] = 0; fsize++; } fclose(fp); } } } MPI_Bcast(&fsize, 1, MPI_INT, 0, MPI_COMM_WORLD); if (fsize == 0) { sprintf(cmesg, "fatal: Could not open/read hypergraph file %s", filename); Gen_Error(0, cmesg); return 0; } if (Proc > 0){ filebuf = (char *)malloc(fsize); } MPI_Bcast(filebuf, fsize, MPI_BYTE, 0, MPI_COMM_WORLD); /* Each process reads through the file, obtaining it's * pins, vertex weights and edge weights. The file lists * global IDs for the vertices and edges. These will be * assigned global numbers based on the order they appear * in the file. The global numbers begin with zero. * Returns 1 on success, 0 on failure. */ rc = process_mtxp_file(pio_info, filebuf, fsize, Num_Proc, Proc, &nGlobalEdges, &nGlobalVtxs, &vtxWDim, &edgeWDim, &nMyPins, &myPinI, &myPinJ, &nMyVtx, &myVtxNum, &myVtxWgts, &nMyEdgeWgts, &myEWGno, &myEdgeWgts); free(filebuf); MPI_Allreduce(&rc, &status, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); if (status != Num_Proc){ return 0; } /* * From the lists of pins, create edge lists. (Unless * the initial pin distribution is by column, in which * case we will test the hypergraph query interface's * ability to accept pins by column rather than row.) */ if (pio_info->init_dist_pins != INITIAL_COL){ /* CRS */ rc = create_edge_lists(nMyPins, myPinI, myPinJ, &numHEdges, &edgeGno, &edgeIdx, &pinGno); mesh->format = ZOLTAN_COMPRESSED_EDGE; } else{ /* CCS */ /* actually creating vertex lists, since we switched * the role of I and J in the argument list. */ rc = create_edge_lists(nMyPins, myPinJ, myPinI, &numHEdges, &edgeGno, &edgeIdx, &pinGno); mesh->format = ZOLTAN_COMPRESSED_VERTEX; } MPI_Allreduce(&rc, &status, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); if (status != Num_Proc){ return 0; } safe_free((void **)(void *)&myPinI); safe_free((void **)(void *)&myPinJ); /* Initialize mesh structure for Hypergraph. */ mesh->data_type = HYPERGRAPH; mesh->num_elems = nMyVtx; mesh->vwgt_dim = vtxWDim; mesh->ewgt_dim = 0; mesh->elem_array_len = mesh->num_elems + 5; mesh->num_dims = 0; mesh->num_el_blks = 1; mesh->gnhedges = nGlobalEdges; mesh->nhedges = numHEdges; /* (or num vertices if CCS) */ mesh->hewgt_dim = edgeWDim; mesh->hgid = edgeGno; /* (or vertex gno if CCS) */ mesh->hindex = edgeIdx; /* (or vertex index if CCS) */ mesh->hvertex = pinGno; /* (or gno of pin edge if CCS) */ mesh->hvertex_proc = NULL; /* don't know don't care */ mesh->heNumWgts = nMyEdgeWgts; mesh->heWgtId = myEWGno; mesh->hewgts = myEdgeWgts; mesh->eb_etypes = (int *) malloc (5 * mesh->num_el_blks * sizeof(int)); if (!mesh->eb_etypes) { Gen_Error(0, "fatal: insufficient memory"); return 0; } mesh->eb_ids = mesh->eb_etypes + mesh->num_el_blks; mesh->eb_cnts = mesh->eb_ids + mesh->num_el_blks; mesh->eb_nnodes = mesh->eb_cnts + mesh->num_el_blks; mesh->eb_nattrs = mesh->eb_nnodes + mesh->num_el_blks; mesh->eb_names = (char **) malloc (mesh->num_el_blks * sizeof(char *)); if (!mesh->eb_names) { Gen_Error(0, "fatal: insufficient memory"); return 0; } mesh->eb_etypes[0] = -1; mesh->eb_ids[0] = 1; mesh->eb_cnts[0] = nGlobalVtxs; mesh->eb_nattrs[0] = 0; mesh->eb_nnodes[0] = 0; /* allocate space for name */ mesh->eb_names[0] = (char *) malloc((MAX_STR_LENGTH+1) * sizeof(char)); if (!mesh->eb_names[0]) { Gen_Error(0, "fatal: insufficient memory"); return 0; } strcpy(mesh->eb_names[0], "hypergraph"); /* allocate the element structure array */ mesh->elements = (ELEM_INFO_PTR) malloc (mesh->elem_array_len * sizeof(ELEM_INFO)); if (!(mesh->elements)) { Gen_Error(0, "fatal: insufficient memory"); return 0; } /* * Write the element structure with the vertices and weights */ for (i = 0; i < mesh->elem_array_len; i++) { initialize_element(&(mesh->elements[i])); if (i < mesh->num_elems){ mesh->elements[i].globalID = myVtxNum[i]; mesh->elements[i].my_part = Proc; for (j=0; j<vtxWDim; j++){ mesh->elements[i].cpu_wgt[j] = myVtxWgts[i*vtxWDim + j]; } } } safe_free((void **)(void *) &myVtxWgts); safe_free((void **)(void *) &myVtxNum); if (Debug_Driver > 3) print_distributed_mesh(Proc, Num_Proc, mesh); DEBUG_TRACE_END(Proc, yo); return 1; }
/* Read from file and set up hypergraph. */ int read_hypergraph_file( int Proc, int Num_Proc, PROB_INFO_PTR prob, PARIO_INFO_PTR pio_info, MESH_INFO_PTR mesh ) { /* Local declarations. */ const char *yo = "read_hypergraph_file"; char cmesg[256]; int i, gnvtxs, distributed_pins = 0, edge, vertex, nextEdge; int nvtxs = 0, gnhedges = 0, nhedges = 0, npins = 0; int vwgt_dim=0, hewgt_dim=0, vtx, edgeSize, global_npins; int *hindex = NULL, *hvertex = NULL, *hvertex_proc = NULL; int *hgid = NULL; float *hewgts = NULL, *vwgts = NULL; ZOLTAN_FILE* fp = NULL; int base = 0; /* Smallest vertex number; usually zero or one. */ char filename[256]; /* Variables that allow graph-based functions to be reused. */ /* If no chaco.graph or chaco.coords files exist, values are NULL or 0, * since graph is not being built. If chaco.graph and/or chaco.coords * exist, these arrays are filled and values stored in mesh. * Including these files allows for comparison of HG methods with other * methods, along with visualization of results and comparison of * LB_Eval results. */ int ch_nvtxs = 0; /* Temporary values for chaco_read_graph. */ #ifdef KDDKDD int ch_vwgt_dim = 0; /* Their values are ignored, as vertex */ #endif float *ch_vwgts = NULL; /* info is provided by hypergraph file. */ int *ch_start = NULL, *ch_adj = NULL, ch_ewgt_dim = 0; short *ch_assignments = NULL; float *ch_ewgts = NULL; int ch_ndim = 0; float *ch_x = NULL, *ch_y = NULL, *ch_z = NULL; int ch_no_geom = TRUE; /* Assume no geometry info is given; reset if it is provided. */ int file_error = 0; /***************************** BEGIN EXECUTION ******************************/ DEBUG_TRACE_START(Proc, yo); if (Proc == 0) { /* Open and read the hypergraph file. */ if (pio_info->file_type == HYPERGRAPH_FILE) sprintf(filename, "%s.hg", pio_info->pexo_fname); else if (pio_info->file_type == MATRIXMARKET_FILE) sprintf(filename, "%s.mtx", pio_info->pexo_fname); else { sprintf(cmesg, "fatal: invalid file type %d", pio_info->file_type); Gen_Error(0, cmesg); return 0; } fp = ZOLTAN_FILE_open(filename, "r", pio_info->file_comp); file_error = (fp == NULL); } MPI_Bcast(&file_error, 1, MPI_INT, 0, MPI_COMM_WORLD); if (file_error) { sprintf(cmesg, "fatal: Could not open hypergraph file %s",pio_info->pexo_fname); Gen_Error(0, cmesg); return 0; } if (pio_info->file_type == HYPERGRAPH_FILE) { /* read the array in on processor 0 */ if (Proc == 0) { if (HG_readfile(Proc, fp, &nvtxs, &nhedges, &npins, &hindex, &hvertex, &vwgt_dim, &vwgts, &hewgt_dim, &hewgts, &base) != 0){ Gen_Error(0, "fatal: Error returned from HG_readfile"); return 0; } } } else if (pio_info->file_type == MATRIXMARKET_FILE) { /* * pio_info->chunk_reader == 0 (the usual case) * process 0 will read entire file in MM_readfile, * and will distribute vertices in chaco_dist_graph and pins in * dist_hyperedges later. (distributed_pins==0) * * pio_info->chunk_reader == 1 ("initial read = chunks" in zdrive.inp) * process 0 will read the file in chunks, and will send vertices * and pins to other processes before reading the next chunk, all * in MM_readfile. (distributed_pins==1) */ if (MM_readfile(Proc, Num_Proc, fp, pio_info, &nvtxs, /* global number of vertices */ &nhedges, /* global number of hyperedges */ &npins, /* local number of pins */ &hindex, &hvertex, &vwgt_dim, &vwgts, &hewgt_dim, &hewgts, &ch_start, &ch_adj, &ch_ewgt_dim, &ch_ewgts, &base, &global_npins)) { Gen_Error(0, "fatal: Error returned from MM_readfile"); return 0; } if (Proc == 0) ZOLTAN_FILE_close(fp); if ((Num_Proc > 1) && pio_info->chunk_reader && (global_npins > Num_Proc)){ distributed_pins = 1; } else{ distributed_pins = 0; } } #ifdef KDDKDD { /* If CHACO graph file is available, read it. */ sprintf(filename, "%s.graph", pio_info->pexo_fname); fp = ZOLTAN_FILE_open(filename, "r", pio_info->file_comp); file_error = #ifndef ZOLTAN_COMPRESS (fp == NULL); #else fp.error; #endif if (!file_error) { /* CHACO graph file is available. */ /* Assuming hypergraph vertices are same as chaco vertices. */ /* Chaco vertices and their weights are ignored in rest of function. */ if (chaco_input_graph(fp, filename, &ch_start, &ch_adj, &ch_nvtxs, &ch_vwgt_dim, &ch_vwgts, &ch_ewgt_dim, &ch_ewgts) != 0) { Gen_Error(0, "fatal: Error returned from chaco_input_graph"); return 0; } } else ch_nvtxs = nvtxs; /* If coordinate file is available, read it. */ sprintf(filename, "%s.coords", pio_info->pexo_fname); fp = ZOLTAN_FILE_open(filename, "r", pio_info->file_comp); file_error = #ifndef ZOLTAN_COMPRESS (fp == NULL); #else fp.error; #endif if (!file_error) { /* CHACO coordinates file is available. */ ch_no_geom = FALSE; if (chaco_input_geom(fpkdd, filename, ch_nvtxs, &ch_ndim, &ch_x, &ch_y, &ch_z) != 0) { Gen_Error(0, "fatal: Error returned from chaco_input_geom"); return 0; } } } #else /* KDDKDD */ ch_nvtxs = nvtxs; #endif /* KDDKDD */ { /* Read Chaco assignment file, if requested */ if (pio_info->init_dist_type == INITIAL_FILE) { sprintf(filename, "%s.assign", pio_info->pexo_fname); fp = ZOLTAN_FILE_open(filename, "r", pio_info->file_comp); if (fp == NULL) { sprintf(cmesg, "Error: Could not open Chaco assignment file %s; " "initial distribution cannot be read", filename); Gen_Error(0, cmesg); return 0; } else { /* read the coordinates in on processor 0 */ ch_assignments = (short *) malloc(nvtxs * sizeof(short)); if (nvtxs && !ch_assignments) { Gen_Error(0, "fatal: memory error in read_hypergraph_file"); return 0; } /* closes fpassign when done */ if (chaco_input_assign(fp, filename, ch_nvtxs, ch_assignments) != 0){ Gen_Error(0, "fatal: Error returned from chaco_input_assign"); return 0; } } } } MPI_Bcast(&base, 1, MPI_INT, 0, MPI_COMM_WORLD); if (distributed_pins){ gnhedges = nhedges; nhedges = 0; hewgt_dim = 0; hewgts = NULL; for (edge=0; edge<gnhedges; edge++){ edgeSize = hindex[edge+1] - hindex[edge]; if (edgeSize > 0) nhedges++; } hgid = (int *)malloc(nhedges * sizeof(int)); hvertex_proc = (int *)malloc(npins * sizeof(int)); nextEdge=0; vtx=0; for (edge=0; edge<gnhedges; edge++){ edgeSize = hindex[edge+1] - hindex[edge]; if (edgeSize > 0){ hgid[nextEdge] = edge+1; if (nextEdge < edge){ hindex[nextEdge+1] = hindex[nextEdge] + edgeSize; } for (vertex=0; vertex<edgeSize; vertex++,vtx++){ hvertex_proc[vtx] = ch_dist_proc(hvertex[vtx], NULL, 1); } nextEdge++; } } gnvtxs = nvtxs; nvtxs = ch_dist_num_vtx(Proc, NULL); if (ch_start){ /* need to include only vertices this process owns */ for (i=0,vertex=0; i<gnvtxs; i++){ if ((ch_start[i+1] > ch_start[vertex]) || /* vtx has adjacencies so it's mine */ (ch_dist_proc(i, NULL, 0) == Proc)) /* my vtx with no adjacencies */ { if (i > vertex){ ch_start[vertex+1] = ch_start[i+1]; } vertex++; } } } #if 0 debug_lists(Proc, Num_Proc, nhedges, hindex, hvertex, hvertex_proc, hgid); #endif } else{ /* Distribute hypergraph graph */ /* Use hypergraph vertex information and chaco edge information. */ if (!chaco_dist_graph(MPI_COMM_WORLD, pio_info, 0, &gnvtxs, &nvtxs, &ch_start, &ch_adj, &vwgt_dim, &vwgts, &ch_ewgt_dim, &ch_ewgts, &ch_ndim, &ch_x, &ch_y, &ch_z, &ch_assignments) != 0) { Gen_Error(0, "fatal: Error returned from chaco_dist_graph"); return 0; } if (!dist_hyperedges(MPI_COMM_WORLD, pio_info, 0, base, gnvtxs, &gnhedges, &nhedges, &hgid, &hindex, &hvertex, &hvertex_proc, &hewgt_dim, &hewgts, ch_assignments)) { Gen_Error(0, "fatal: Error returned from dist_hyperedges"); return 0; } } /* Initialize mesh structure for Hypergraph. */ mesh->data_type = HYPERGRAPH; mesh->num_elems = nvtxs; mesh->vwgt_dim = vwgt_dim; mesh->ewgt_dim = ch_ewgt_dim; mesh->elem_array_len = mesh->num_elems + 5; mesh->num_dims = ch_ndim; mesh->num_el_blks = 1; mesh->gnhedges = gnhedges; mesh->nhedges = nhedges; mesh->hewgt_dim = hewgt_dim; mesh->hgid = hgid; mesh->hindex = hindex; mesh->hvertex = hvertex; mesh->hvertex_proc = hvertex_proc; mesh->heNumWgts = nhedges; mesh->heWgtId = NULL; mesh->hewgts = hewgts; mesh->eb_etypes = (int *) malloc (5 * mesh->num_el_blks * sizeof(int)); if (!mesh->eb_etypes) { Gen_Error(0, "fatal: insufficient memory"); return 0; } mesh->eb_ids = mesh->eb_etypes + mesh->num_el_blks; mesh->eb_cnts = mesh->eb_ids + mesh->num_el_blks; mesh->eb_nnodes = mesh->eb_cnts + mesh->num_el_blks; mesh->eb_nattrs = mesh->eb_nnodes + mesh->num_el_blks; mesh->eb_names = (char **) malloc (mesh->num_el_blks * sizeof(char *)); if (!mesh->eb_names) { Gen_Error(0, "fatal: insufficient memory"); return 0; } mesh->eb_etypes[0] = -1; mesh->eb_ids[0] = 1; mesh->eb_cnts[0] = nvtxs; mesh->eb_nattrs[0] = 0; /* * Each element has one set of coordinates (i.e., node) if a coords file * was provided; zero otherwise. */ MPI_Bcast( &ch_no_geom, 1, MPI_INT, 0, MPI_COMM_WORLD); if (ch_no_geom) mesh->eb_nnodes[0] = 0; else mesh->eb_nnodes[0] = 1; /* allocate space for name */ mesh->eb_names[0] = (char *) malloc((MAX_STR_LENGTH+1) * sizeof(char)); if (!mesh->eb_names[0]) { Gen_Error(0, "fatal: insufficient memory"); return 0; } strcpy(mesh->eb_names[0], "hypergraph"); /* allocate the element structure array */ mesh->elements = (ELEM_INFO_PTR) malloc (mesh->elem_array_len * sizeof(ELEM_INFO)); if (!(mesh->elements)) { Gen_Error(0, "fatal: insufficient memory"); return 0; } /* * initialize all of the element structs as unused by * setting the globalID to -1 */ for (i = 0; i < mesh->elem_array_len; i++) initialize_element(&(mesh->elements[i])); /* * now fill the element structure array with the * information from the Chaco file * Use hypergraph vertex information and chaco edge information. */ if (!chaco_fill_elements(Proc, Num_Proc, prob, mesh, gnvtxs, nvtxs, ch_start, ch_adj, vwgt_dim, vwgts, ch_ewgt_dim, ch_ewgts, ch_ndim, ch_x, ch_y, ch_z, ch_assignments, base)) { Gen_Error(0, "fatal: Error returned from chaco_fill_elements"); return 0; } #if 0 debug_elements(Proc, Num_Proc, mesh->num_elems,mesh->elements); #endif safe_free((void **)(void *) &vwgts); safe_free((void **)(void *) &ch_ewgts); safe_free((void **)(void *) &ch_vwgts); safe_free((void **)(void *) &ch_x); safe_free((void **)(void *) &ch_y); safe_free((void **)(void *) &ch_z); safe_free((void **)(void *) &ch_start); safe_free((void **)(void *) &ch_adj); safe_free((void **)(void *) &ch_assignments); if (Debug_Driver > 3) print_distributed_mesh(Proc, Num_Proc, mesh); DEBUG_TRACE_END(Proc, yo); return 1; }
PerformanceData run( const typename FixtureType::FEMeshType & mesh , const int global_max_x , const int global_max_y , const int global_max_z , const unsigned uq_count , const int steps , const int print_sample ) { typedef Scalar scalar_type ; typedef FixtureType fixture_type ; typedef typename fixture_type::device_type device_type ; enum { ElementNodeCount = fixture_type::element_node_count }; const int total_num_steps = steps ; const Scalar user_dt = 5.0e-6; //const Scalar end_time = 0.0050; // element block parameters const Scalar lin_bulk_visc = 0.0; const Scalar quad_bulk_visc = 0.0; // const Scalar lin_bulk_visc = 0.06; // const Scalar quad_bulk_visc = 1.2; // const Scalar hg_stiffness = 0.0; // const Scalar hg_viscosity = 0.0; // const Scalar hg_stiffness = 0.03; // const Scalar hg_viscosity = 0.001; // material properties const Scalar youngs_modulus=1.0e6; const Scalar poissons_ratio=0.0; const Scalar density = 8.0e-4; const comm::Machine machine = mesh.parallel_data_map.machine ; PerformanceData perf_data ; Kokkos::Impl::Timer wall_clock ; //------------------------------------ // Generate fields typedef Fields< scalar_type , device_type > fields_type ; fields_type mesh_fields( mesh , uq_count , lin_bulk_visc , quad_bulk_visc , youngs_modulus , poissons_ratio , density ); typename fields_type::node_coords_type::HostMirror model_coords_h = Kokkos::create_mirror( mesh_fields.model_coords ); typename fields_type::spatial_precise_view::HostMirror displacement_h = Kokkos::create_mirror( mesh_fields.displacement ); typename fields_type::spatial_precise_view::HostMirror velocity_h = Kokkos::create_mirror( mesh_fields.velocity ); Kokkos::deep_copy( model_coords_h , mesh_fields.model_coords ); //------------------------------------ // Initialization initialize_element( mesh_fields ); initialize_node( mesh_fields ); const Scalar x_bc = global_max_x ; // Initial condition on velocity to initiate a pulse along the X axis { const unsigned X = 0; for ( unsigned inode = 0; inode< mesh_fields.num_nodes; ++inode) { for ( unsigned kq = 0 ; kq < uq_count ; ++kq ) { if ( model_coords_h(inode,X) == 0 ) { velocity_h(inode,kq,X) = 1000 + 100 * kq ; velocity_h(inode,kq,X) = 1000 + 100 * kq ; } } } } Kokkos::deep_copy( mesh_fields.velocity , velocity_h ); Kokkos::deep_copy( mesh_fields.velocity_new , velocity_h ); //-------------------------------------------------------------------------- // We will call a sequence of functions. These functions have been // grouped into several functors to balance the number of global memory // accesses versus requiring too many registers or too much L1 cache. // Global memory accees have read/write cost and memory subsystem contention cost. //-------------------------------------------------------------------------- perf_data.init_time = comm::max( machine , wall_clock.seconds() ); // Parameters required for the internal force computations. perf_data.number_of_steps = total_num_steps ; typedef typename fields_type::spatial_precise_view::scalar_type comm_value_type ; const unsigned comm_value_count = 6 ; Kokkos::AsyncExchange< comm_value_type , device_type , Kokkos::ParallelDataMap > comm_exchange( mesh.parallel_data_map , comm_value_count * uq_count ); for ( int step = 0; step < total_num_steps; ++step ) { //------------------------------------------------------------------------ // rotate the state variable views. swap( mesh_fields.dt , mesh_fields.dt_new ); swap( mesh_fields.displacement , mesh_fields.displacement_new ); swap( mesh_fields.velocity , mesh_fields.velocity_new ); swap( mesh_fields.rotation , mesh_fields.rotation_new ); //------------------------------------------------------------------------ // Communicate "send" nodes' displacement and velocity next_state // to the ghosted nodes. // buffer packages: { { dx , dy , dz , vx , vy , vz }_node } wall_clock.reset(); pack_state( mesh_fields , comm_exchange.buffer(), mesh.parallel_data_map.count_interior , mesh.parallel_data_map.count_send ); comm_exchange.setup(); comm_exchange.send_receive(); unpack_state( mesh_fields , comm_exchange.buffer() , mesh.parallel_data_map.count_owned , mesh.parallel_data_map.count_receive ); device_type::fence(); perf_data.comm_time += comm::max( machine , wall_clock.seconds() ); //------------------------------------------------------------------------ wall_clock.reset(); // First kernel 'grad_hgop' combines two functions: // gradient, velocity gradient gradient( mesh_fields ); // Combine tensor decomposition and rotation functions. decomp_rotate( mesh_fields ); internal_force( mesh_fields , user_dt ); device_type::fence(); perf_data.internal_force_time += comm::max( machine , wall_clock.seconds() ); //------------------------------------------------------------------------ // Assembly of elements' contributions to nodal force into // a nodal force vector. Update the accelerations, velocities, // displacements. // The same pattern can be used for matrix-free residual computations. wall_clock.reset(); nodal_update( mesh_fields , x_bc ); device_type::fence(); perf_data.central_diff += comm::max( machine , wall_clock.seconds() ); if ( print_sample && 0 == step % 100 ) { Kokkos::deep_copy( displacement_h , mesh_fields.displacement_new ); Kokkos::deep_copy( velocity_h , mesh_fields.velocity_new ); if ( 1 == print_sample ) { for ( unsigned kp = 0 ; kp < uq_count ; ++kp ) { std::cout << "step " << step << " : displacement({*,0,0}," << kp << ",0) =" ; for ( unsigned i = 0 ; i < mesh_fields.num_nodes_owned ; ++i ) { if ( model_coords_h(i,1) == 0 && model_coords_h(i,2) == 0 ) { std::cout << " " << displacement_h(i,kp,0); } } std::cout << std::endl ; const float tol = 1.0e-6 ; const int yb = global_max_y ; const int zb = global_max_z ; std::cout << "step " << step << " : displacement({*," << yb << "," << zb << "}," << kp << ",0) =" ; for ( unsigned i = 0 ; i < mesh_fields.num_nodes_owned ; ++i ) { if ( fabs( model_coords_h(i,1) - yb ) < tol && fabs( model_coords_h(i,2) - zb ) < tol ) { std::cout << " " << displacement_h(i,kp,0); } } std::cout << std::endl ; } } else if ( 2 == print_sample ) { const unsigned kp = 0 ; const float tol = 1.0e-6 ; const int xb = global_max_x / 2 ; const int yb = global_max_y / 2 ; const int zb = global_max_z / 2 ; for ( unsigned i = 0 ; i < mesh_fields.num_nodes_owned ; ++i ) { if ( fabs( model_coords_h(i,0) - xb ) < tol && fabs( model_coords_h(i,1) - yb ) < tol && fabs( model_coords_h(i,2) - zb ) < tol ) { std::cout << "step " << step << " : displacement(" << xb << "," << yb << "," << zb << ") = {" << std::setprecision(6) << " " << displacement_h(i,kp,0) << std::setprecision(2) << " " << displacement_h(i,kp,1) << std::setprecision(2) << " " << displacement_h(i,kp,2) << " }" << std::endl ; } } } } } return perf_data ; }