/* * Internal function which adds digests to the defrag_list * Mallocs the nodes of defrag_list * Returns : * -1 : Error * number of digests found : success * */ static long build_defrag_list_from_nbtr(as_namespace *ns, ai_obj *acol, bt *nbtr, ulong nofst, ulong *limit, uint64_t * tot_found, cf_ll *gc_list) { int error = -1; btEntry *nbe; // STEP 1: go thru a portion of the nbtr and find to-be-deleted-PKs // TODO: a range query may be smarter then using the Xth Iterator btSIter *nbi = (nofst ? btGetFullXthIter(nbtr, nofst, 1, NULL, 0) : btGetFullRangeIter(nbtr, 1, NULL)); if (!nbi) { return error; } long found = 0; long processed = 0; while ((nbe = btRangeNext(nbi, 1))) { ai_obj *akey = nbe->key; int ret = as_sindex_can_defrag_record(ns, (cf_digest *) (&akey->y)); if (ret == AS_SINDEX_GC_SKIP_ITERATION) { *limit = 0; break; } else if (ret == AS_SINDEX_GC_OK) { bool create = (cf_ll_size(gc_list) == 0) ? true : false; objs_to_defrag_arr *dt; if (!create) { cf_ll_element * ele = cf_ll_get_tail(gc_list); dt = ((ll_sindex_gc_element*)ele)->objs_to_defrag; if (dt->num == SINDEX_GC_NUM_OBJS_PER_ARR) { create = true; } } if (create) { dt = as_sindex_gc_get_defrag_arr(); if (!dt) { *tot_found += found; return -1; } ll_sindex_gc_element * node; node = cf_malloc(sizeof(ll_sindex_gc_element)); node->objs_to_defrag = dt; cf_ll_append(gc_list, (cf_ll_element *)node); } cloneDigestFromai_obj(&(dt->acol_digs[dt->num].dig), akey); ai_objClone(&(dt->acol_digs[dt->num].acol), acol); dt->num += 1; found++; } processed++; (*limit)--; if (*limit == 0) break; } btReleaseRangeIterator(nbi); *tot_found += found; return processed; }
static long build_defrag_list_from_arr(as_namespace *ns, ai_obj *acol, ai_arr *arr, long nofst, long *limit, uint64_t * tot_found, cf_ll *gc_list) { long found = 0; long processed = 0; uint64_t validation_time_ns = 0; for (int i = nofst; i < arr->used; i++) { SET_TIME_FOR_SINDEX_GC_HIST(validation_time_ns); int ret = as_sindex_can_defrag_record(ns, (cf_digest *) &arr->data[i * CF_DIGEST_KEY_SZ]); SINDEX_GC_HIST_INSERT_DATA_POINT(sindex_gc_validate_obj_hist, validation_time_ns); validation_time_ns = 0; if (ret == AS_SINDEX_GC_SKIP_ITERATION) { *limit = 0; break; } else if (ret == AS_SINDEX_GC_OK) { bool create = (cf_ll_size(gc_list) == 0) ? true : false; objs_to_defrag_arr *dt; if (!create) { cf_ll_element * ele = cf_ll_get_tail(gc_list); dt = ((ll_sindex_gc_element*)ele)->objs_to_defrag; if (dt->num == SINDEX_GC_NUM_OBJS_PER_ARR) { create = true; } } if (create) { dt = as_sindex_gc_get_defrag_arr(); if (!dt) { *tot_found += found; return -1; } ll_sindex_gc_element * node; node = cf_malloc(sizeof(ll_sindex_gc_element)); node->objs_to_defrag = dt; cf_ll_append(gc_list, (cf_ll_element *)node); } memcpy(&(dt->acol_digs[dt->num].dig), (cf_digest *) &arr->data[i * CF_DIGEST_KEY_SZ], CF_DIGEST_KEY_SZ); ai_objClone(&(dt->acol_digs[dt->num].acol), acol); dt->num += 1; found++; } processed++; (*limit)--; if (*limit == 0) { break; } } *tot_found += found; return processed; }
/* * Return 0 in case of success * -1 in case of failure */ static int add_recs_from_nbtr(as_sindex_metadata *imd, ai_obj *ikey, bt *nbtr, as_sindex_qctx *qctx, bool fullrng) { int ret = 0; ai_obj sfk, efk; init_ai_obj(&sfk); init_ai_obj(&efk); btSIter *nbi; btEntry *nbe; btSIter stack_nbi; if (fullrng) { nbi = btSetFullRangeIter(&stack_nbi, nbtr, 1, NULL); } else { // search from LAST batches end-point init_ai_objFromDigest(&sfk, &qctx->bdig); assignMaxKey(nbtr, &efk); nbi = btSetRangeIter(&stack_nbi, nbtr, &sfk, &efk, 1); } if (nbi) { while ((nbe = btRangeNext(nbi, 1))) { ai_obj *akey = nbe->key; // FIRST can be REPEAT (last batch) if (!fullrng && ai_objEQ(&sfk, akey)) { continue; } if (btree_addsinglerec(imd, ikey, (cf_digest *)&akey->y, qctx->recl, &qctx->n_bdigs, qctx->can_partition_query, qctx->partitions_pre_reserved)) { ret = -1; break; } if (qctx->n_bdigs == qctx->bsize) { if (ikey) { ai_objClone(qctx->bkey, ikey); } cloneDigestFromai_obj(&qctx->bdig, akey); break; } } btReleaseRangeIterator(nbi); } else { cf_warning(AS_QUERY, "Could not find nbtr iterator.. skipping !!"); } return ret; }
static int add_recs_from_arr(as_sindex_metadata *imd, ai_obj *ikey, ai_arr *arr, as_sindex_qctx *qctx) { bool ret = 0; for (int i = 0; i < arr->used; i++) { if (btree_addsinglerec(imd, (cf_digest *)&arr->data[i * CF_DIGEST_KEY_SZ], qctx->recl, &qctx->n_bdigs)) { ret = -1; break; } // do not break on hitting batch limit, if the tree converts to // bt from arr, there is no way to know which digest were already // returned when attempting subsequent batch. Return the entire // thing. } // mark nbtr as finished and copy the offset qctx->nbtr_done = true; if (ikey) { ai_objClone(qctx->bkey, ikey); } return ret; }
/* * Aerospike Index interface to build a defrag_list. * * Returns : * AS_SINDEX_DONE ---> The current pimd has been scanned completely for defragging * AS_SINDEX_CONTINUE ---> Current pimd sill may have some candidate digest to be defragged * AS_SINDEX_ERR ---> Error. Abort this pimd. * * Notes : Caller has the responsibility to free the iterators. * Requires a proper offset value from the caller. */ int ai_btree_build_defrag_list(as_sindex_metadata *imd, as_sindex_pmetadata *pimd, ai_obj *icol, long *nofst, long limit, uint64_t * tot_processed, uint64_t * tot_found, cf_ll *gc_list) { int ret = AS_SINDEX_ERR; if (!pimd || !imd) { return ret; } as_namespace *ns = imd->si->ns; if (!ns) { ns = as_namespace_get_byname((char *)imd->ns_name); } char *iname = get_iname_from_imd(imd); if (!iname) { ret = AS_SINDEX_ERR_NO_MEMORY; return ret; } if (!pimd || !pimd->ibtr || !pimd->ibtr->numkeys) { goto END; } //Entry is range query, FROM previous icol TO maxKey(ibtr) if (icol->empty) { assignMinKey(pimd->ibtr, icol); // init first call } ai_obj iH; assignMaxKey(pimd->ibtr, &iH); btEntry *be = NULL; btSIter *bi = btGetRangeIter(pimd->ibtr, icol, &iH, 1); if (!bi) { goto END; } while ( true ) { be = btRangeNext(bi, 1); if (!be) { ret = AS_SINDEX_DONE; break; } ai_obj *acol = be->key; ai_nbtr *anbtr = be->val; long processed = 0; if (!anbtr) { break; } if (anbtr->is_btree) { processed = build_defrag_list_from_nbtr(ns, acol, anbtr->u.nbtr, *nofst, &limit, tot_found, gc_list); } else { processed = build_defrag_list_from_arr(ns, acol, anbtr->u.arr, *nofst, &limit, tot_found, gc_list); } if (processed < 0) { // error .. abort everything. cf_detail(AS_SINDEX, "build_defrag_list returns an error. Aborting defrag on current pimd"); ret = AS_SINDEX_ERR; break; } *tot_processed += processed; // This tree may have some more digest to defrag if (limit == 0) { *nofst = *nofst + processed; ai_objClone(icol, acol); cf_detail(AS_SINDEX, "Current pimd may need more iteration of defragging."); ret = AS_SINDEX_CONTINUE; break; } // We have finished this tree. Yet we have not reached our limit to defrag. // Goes to next iteration *nofst = 0; ai_objClone(icol, acol); }; btReleaseRangeIterator(bi); END: cf_free(iname); return ret; }