int merge_ordered_sets (const int **labels,const float **vals, const int *sizes,int k, int **labels_out,float **vals_out) { int i,j; int n_out = ivec_sum (sizes, k); int *all_labels = ivec_new (n_out); float *all_vals = fvec_new (n_out); /* Maxheap: * * maxheap label = index of table in 0..k-1 * * maxheap val = - (label from labels table) * * If maxheap val does not fit in a float (if label>2**24), it * triggers an assertion. Time to implement a maxheap with int * values... */ fbinheap_t *mh = fbinheap_new(k); /* current index on table k */ int indices[k]; for ( i = 0 ; i < k ; i++) { if (sizes[i] == 0) continue; indices[i] = 0; int label = labels[i][0]; float mh_val = -label; assert ((int)(-mh_val) == label || !"lost precision in int->float conversion"); fbinheap_add (mh, i, mh_val); } int all_i = 0; while (mh->k>0) { /* smallest available label */ i = mh->label[1]; /* index of table */ j = (int)(-mh->val[1]); /* label */ /* I don't dare compiling with -DNDEBUG */ /* assert(j==labels[i][indices[i]]); */ all_labels[all_i] = j; all_vals[all_i] = vals[i][indices[i]]; all_i++; /* remove handled label */ fbinheap_pop (mh); indices[i]++; if (indices[i] < sizes[i]) { /* push next label from this table */ int label = labels[i][indices[i]]; float mh_val = -label; assert ((int)(-mh_val) == label || !"lost precision in int->float conversion"); fbinheap_add (mh, i, mh_val); } } fbinheap_delete (mh); assert (all_i == n_out); *labels_out = all_labels; *vals_out = all_vals; return n_out; }
static PetscErrorCode set_pairwise(gs_id *gs) { PetscInt i, j; PetscInt p_mask_size; PetscInt *p_mask, *sh_proc_mask, *tmp_proc_mask; PetscInt *ngh_buf, *buf2; PetscInt offset; PetscInt *msg_list, *msg_size, **msg_nodes, nprs; PetscInt *pairwise_elm_list, len_pair_list=0; PetscInt *iptr, t1, i_start, nel, *elms; PetscInt ct; PetscErrorCode ierr; PetscFunctionBegin; /* to make life easier */ nel = gs->nel; elms = gs->elms; ngh_buf = gs->ngh_buf; sh_proc_mask = gs->pw_nghs; /* need a few temp masks */ p_mask_size = len_bit_mask(num_nodes); p_mask = (PetscInt*) malloc(p_mask_size); tmp_proc_mask = (PetscInt*) malloc(p_mask_size); /* set mask to my my_id's bit mask */ ierr = set_bit_mask(p_mask,p_mask_size,my_id);CHKERRQ(ierr); p_mask_size /= sizeof(PetscInt); len_pair_list=gs->len_pw_list; gs->pw_elm_list=pairwise_elm_list=(PetscInt*)malloc((len_pair_list+1)*sizeof(PetscInt)); /* how many processors (nghs) do we have to exchange with? */ nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); /* allocate space for gs_gop() info */ gs->pair_list = msg_list = (PetscInt *) malloc(sizeof(PetscInt)*nprs); gs->msg_sizes = msg_size = (PetscInt *) malloc(sizeof(PetscInt)*nprs); gs->node_list = msg_nodes = (PetscInt **) malloc(sizeof(PetscInt*)*(nprs+1)); /* init msg_size list */ ierr = ivec_zero(msg_size,nprs);CHKERRQ(ierr); /* expand from bit mask list to int list */ ierr = bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);CHKERRQ(ierr); /* keep list of elements being handled pairwise */ for (i=j=0;i<nel;i++) { if (elms[i] & TOP_BIT) {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;} } pairwise_elm_list[j] = -1; gs->msg_ids_out = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); gs->msg_ids_out[nprs] = MPI_REQUEST_NULL; gs->msg_ids_in = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); gs->msg_ids_in[nprs] = MPI_REQUEST_NULL; gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz); /* find who goes to each processor */ for (i_start=i=0;i<nprs;i++) { /* processor i's mask */ ierr = set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);CHKERRQ(ierr); /* det # going to processor i */ for (ct=j=0;j<len_pair_list;j++) { buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); ierr = ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);CHKERRQ(ierr); if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) {ct++;} } msg_size[i] = ct; i_start = PetscMax(i_start,ct); /*space to hold nodes in message to first neighbor */ msg_nodes[i] = iptr = (PetscInt*) malloc(sizeof(PetscInt)*(ct+1)); for (j=0;j<len_pair_list;j++) { buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); ierr = ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);CHKERRQ(ierr); if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) {*iptr++ = j;} } *iptr = -1; } msg_nodes[nprs] = NULL; j=gs->loc_node_pairs=i_start; t1 = GL_MAX; ierr = giop(&i_start,&offset,1,&t1);CHKERRQ(ierr); gs->max_node_pairs = i_start; i_start=j; t1 = GL_MIN; ierr = giop(&i_start,&offset,1,&t1);CHKERRQ(ierr); gs->min_node_pairs = i_start; i_start=j; t1 = GL_ADD; ierr = giop(&i_start,&offset,1,&t1);CHKERRQ(ierr); gs->avg_node_pairs = i_start/num_nodes + 1; i_start=nprs; t1 = GL_MAX; giop(&i_start,&offset,1,&t1); gs->max_pairs = i_start; /* remap pairwise in tail of gsi_via_bit_mask() */ gs->msg_total = ivec_sum(gs->msg_sizes,nprs); gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); gs->in = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); /* reset malloc pool */ free((void*)p_mask); free((void*)tmp_proc_mask); PetscFunctionReturn(0); }