inline slint sort_permute_backward_(elements_t *s, elements_t *sx, slint *perm, slint offset) /* sl_func sort_permute_backward_ */ { elements_t src, end, e0, e1, *from, *to, *t; slint i, j, k, *ia, *ja; elem_assign(s, &src); elem_assign_at(s, s->size, &end); from = &e0; to = &e1; i = offset; ia = perm; while (src.keys != end.keys) { if (*ia != i) { elem_copy(&src, sx); elem_assign(&src, to); ja = ia; j = i; while (i != (k = *ja)) { elem_assign_at(s, k - offset, from); elem_copy(from, to); t = to; to = from; from = t; *ja = j; ja = perm + (j = k) - offset; } elem_copy(sx, to); *ja = j; } ia++; i++; elem_inc(&src); } return 0; }
/* This gets the fore part of the product and saves it and * also gets the back part */ static Bool get_both_parts( const Set* a, const Set* b, SetIter* iter, SetIter* iter_a, SetIter* iter_b, Tuple* tuple, int offset, int offset2) { int i; if (!set_iter_next_intern(iter_a, a, tuple, offset)) return FALSE; for(i = 0; i < a->head.dim; i++) { assert(iter->prod.elem[i] == NULL); iter->prod.elem[i] = elem_copy(tuple_get_elem(tuple, i + offset)); assert(elem_is_valid(iter->prod.elem[i])); } if (!set_iter_next_intern(iter_b, b, tuple, offset2)) return FALSE; return TRUE; }
bool ofc_parse_list_copy( unsigned* dst_count, void*** dst, unsigned src_count, const void** src, void* (*elem_copy)(const void*), void (*elem_delete)(void*)) { if (!elem_copy || !src || !dst || !dst_count) return false; void** copy = (void**)malloc( src_count * sizeof(void*)); if (!copy) return false; unsigned i; for (i = 0; i < src_count; i++) { copy[i] = elem_copy(src[i]); if (!copy[i]) { if (elem_delete) { unsigned j; for (j = 0; j < i; j++) elem_delete(copy[j]); } free(copy); return false; } } *dst = copy; *dst_count = src_count; return true; }
List* list_new_elem(const Elem* elem) { ListData data; assert(elem_is_valid(elem)); data.elem = elem_copy(elem); return list_new(LIST_ELEM, &data); }
void list_insert_elem(List* list, const Elem* elem) { ListData data; assert(list_is_valid(list)); assert(elem_is_valid(elem)); assert(list->type == LIST_ELEM); data.elem = elem_copy(elem); list_insert_data(list, &data); }
/*ARGSUSED*/ static Bool set_prod_iter_next( SetIter* iter, const Set* set, Tuple* tuple, int offset) { Set* a; Set* b; SetIter* iter_a; SetIter* iter_b; int offset2; int i; assert(set_prod_iter_is_valid(iter)); assert(set_prod_is_valid(set)); assert(tuple_is_valid(tuple)); assert(offset >= 0); assert(offset + set->head.dim <= tuple_get_dim(tuple)); a = set->prod.set_a; b = set->prod.set_b; iter_a = iter->prod.iter_a; iter_b = iter->prod.iter_b; offset2 = offset + a->head.dim; if (iter->prod.first) { iter->prod.first = FALSE; return get_both_parts(a, b, iter, iter_a, iter_b, tuple, offset, offset2); } assert(!iter->prod.first); /* Get back part */ if (set_iter_next_intern(iter_b, b, tuple, offset2)) { /* copy fore part */ for(i = 0; i < a->head.dim; i++) tuple_set_elem(tuple, i + offset, elem_copy(iter->prod.elem[i])); return TRUE; } /* No back part, so reset it */ set_iter_reset_intern(iter_b, b); /* Clear elem cache */ for(i = 0; i < set->head.dim; i++) { if (iter->prod.elem[i] != NULL) { elem_free(iter->prod.elem[i]); iter->prod.elem[i] = NULL; } } return get_both_parts(a, b, iter, iter_a, iter_b, tuple, offset, offset2); }
slint mpi_splitk_dummy(elements_t *s, k2c_func k2c, void *ci, elements_t *sx, slint *send_stats, int size, int rank, MPI_Comm comm) /* sl_proto, sl_func mpi_splitk_dummy */ { slint i, j, k, t; slint local_sb_counts[size]; slint _send_stats[size]; elements_t sb[size], sb_current[size]; elements_t src, dst, end; if (s == NULL || ci == NULL || sx == NULL) return -1; /* need send_buffers with at least one element per foreign process */ if (sx->size < size - 1) return -2; rti_tstart(rti_tid_mpi_splitk_dummy); rti_tstart(rti_tid_mpi_splitk_dummy_init); if (send_stats == NULL) send_stats = _send_stats; /* initials */ j = sx->size; k = size - 1; for (i = 0; i < size; ++i) { /* init the local send_buffer counters */ local_sb_counts[i] = 0; /* prepare the send_buffers */ if (i != rank) { elem_assign_at(sx, sx->size - j, &sb[i]); sb[i].size = (j / k) + (j % k != 0); j -= sb[i].size; --k; } else elem_null(&sb[i]); elem_assign(&sb[i], &sb_current[i]); send_stats[i] = 0; } elem_assign(s, &src); elem_assign(s, &dst); elem_assign_at(s, s->size, &end); rti_tstop(rti_tid_mpi_splitk_dummy_init); rti_tstart(rti_tid_mpi_splitk_dummy_loop); while (1) { /* distribute the elements to the send_buffer, as long as possible (elements left and target send_buffer not full) */ while (src.keys != end.keys) { /* compute the target-process of the current element */ t = (k2c)(src.keys, src.keys - s->keys, ci); ++send_stats[t]; #ifndef K2C_ONLY /* is the local process the target? */ if (t == rank) { /* if necessary, move the element on the local process */ if (src.keys != dst.keys) elem_copy(&src, &dst); /* update the dst-position */ elem_inc(&dst); } else /* the target is another process (need to send the element) */ { /* break, if the according send_buffer is full */ if (local_sb_counts[t] >= sb[t].size) break; /* copy the element to the according send_buffer */ elem_copy(&src, &sb_current[t]); elem_inc(&sb_current[t]); ++local_sb_counts[t]; if (local_sb_counts[t] >= sb[t].size) { elem_sub(&sb_current[t], local_sb_counts[t]); local_sb_counts[t] = 0; } } #endif /* update the src-position */ elem_inc(&src); } break; } rti_tstop(rti_tid_mpi_splitk_dummy_loop); rti_tstop(rti_tid_mpi_splitk_dummy); return 0; }
slint_t rs_rec_ma_db(elements_t *s, elements_t *sx, slint_t rhigh, slint_t rlow, slint_t rwidth, slint_t switchdb) /* sl_func rs_rec_ma_db */ { #define max_nclasses (powof2_typed(sort_radix_width_max, slkey_pure_t)) slkey_pure_t bit_mask, nclasses; slint_t i, j, current_width, c[max_nclasses]; elements_t xi, xj, end, parts[max_nclasses]; elem_assign_at(s, s->size, &end); current_width = xmin(rwidth, rhigh - rlow + 1); rhigh -= current_width - 1; nclasses = powof2_typed(current_width, slkey_pure_t); bit_mask = nclasses - 1; /* zero all counter */ for (i = 0; i < nclasses; i++) c[i] = 0; /* count the number of elements in every class */ for (elem_assign(s, &xi); xi.keys < end.keys; elem_inc(&xi)) ++c[key_radix_key2class(key_purify(*xi.keys), rhigh, bit_mask)]; /* compute the target of every class */ elem_assign(sx, &parts[0]); for (i = 1; i < nclasses; i++) elem_assign_at(&parts[i - 1], c[i - 1], &parts[i]); /* split the elements */ elem_assign(s, &xi); elem_assign_at(s, s->size, &end); while (xi.keys < end.keys) { j = key_radix_key2class(key_purify(*xi.keys), rhigh, bit_mask); elem_copy(&xi, &parts[j]); elem_inc(&xi); elem_inc(&parts[j]); } --rhigh; if (rhigh >= rlow) { #ifdef SR_MA_INSERTSORT bit_mask = 0; if (rhigh - rlow + 1 <= key_radix_high) bit_mask = powof2_typed(rhigh - rlow + 1, slkey_pure_t); bit_mask = (bit_mask - 1) << rlow; #endif elem_assign(s, &xi); elem_assign(sx, &xj); for (i = 0; i < nclasses; i++) { xi.size = xj.size = c[i]; #ifdef SR_MA_INSERTSORT if (c[i] > sort_radix_threshold_rec) rs_rec_ma_db(&xj, &xi, rhigh, rlow, rwidth, (!switchdb)); else { if (c[i] > 1) sort_insert_bmask_kernel(&xj, &xi, bit_mask); if (switchdb) elem_ncopy(&xj, &xi, c[i]); } elem_add(&xi, c[i]); elem_add(&xj, c[i]); } #else if (c[i] > 1) rs_rec_ma_db(&xj, &xi, rhigh, rlow, rwidth, (!switchdb)); #endif } else elem_ncopy(sx, s, s->size); return 0; }