/* Run a query and return a list of ranked results. * Call this from Perl like `$index->fetch(['water', 'jar'])`. The result * will be an arrayref, with each entry of it being an arrayref of the form * [id, rank]. They will be sorted by their rank in descending order, so the * most relevant document will be at the top. */ SV* fetch(SV* tokens) const { AV* av = reinterpret_cast<AV*>(SvRV(tokens)); std::unordered_map<std::string, int> query; for (int i = 0; i <= av_top_index(av); ++i) { std::string token = string_from_sv(*av_fetch(av, i, 0)); ++query[token]; } AV* results = newAV(); for (const auto& id2rank : search(query)) { AV* entry = newAV(); double length = lengths.find(id2rank.first)->second; av_push(entry, newSViv(id2rank.first)); av_push(entry, newSVnv(id2rank.second / length)); av_push(results, newRV_noinc(reinterpret_cast<SV*>(entry))); } sortsv(AvARRAY(results), av_top_index(results) + 1, sort_ratings); return newRV_noinc(reinterpret_cast<SV*>(results)); }
/* Add a document with the given ID and arrayref of tokens to the index. * Each time this is called, the given ID must be greater than the previous * one. The IDs need not be sequential, however. * Call this from Perl like `$index->add_document(123, ['cup', 'tea'])`. */ void add_document(int id, SV* tokens) { std::unordered_map<std::string, int> vec; AV* av = reinterpret_cast<AV*>(SvRV(tokens)); for (int i = 0; i <= av_top_index(av); ++i) { std::string token = string_from_sv(*av_fetch(av, i, 0)); ++vec[token]; } for (const auto& token2tf : vec) { index[token2tf.first].push_back({id, token2tf.second}); } lengths[id] = 0; }
static void xs_new(pTHX_ SV *cv) { dXSARGS; if (items < 1) croak("Usage: class, ref"); #ifndef MULTIPLICITY AV *slots = CvXSUBANY(cv).any_ptr; #else MAGIC *mg = mg_findext(cv, PERL_MAGIC_ext, &ATTRS_TBL); AV *slots = (AV *)mg->mg_obj; #endif SV *class = ST(0); HV *hash = newHV(); SV *obj = sv_2mortal(newRV_noinc((SV *)hash)); // don't move to the end(leaks) SV **args; // uniq args int args_count = args_to_uniq(&ST(1), items - 1, &args); // skip 1(class) int slots_count = av_top_index(slots) + 1; for (int i = 0; i < slots_count; i++) { // NEXT_SLOT: ECAslot *slot = sv2slot(av_fetch_or_croak(slots, i)); // iterage args, null if matched for (int j = 0; j < args_count; j += 2) { SV *tmp = args[j]; if (!tmp) continue; // already matched if (!sv_cmp(tmp, slot->key)) { if (slot->check) do_check(slot->check, args[j + 1], slot->key); hv_he_store_or_croak(hash, slot->key, args[j + 1]); args[j] = NULL; goto NEXT_SLOT; args[j] = NULL; // mark as consumed } } // slot not found in passed args, decide what to do if (slot->type == ECA_REQUIRED) { croak("Attribute \"%s\" is required", SvPV_nolen(slot->key)); } else if (slot->type == ECA_DEFAULT) { hv_he_store_or_croak(hash, slot->key, slot->value); } else if (slot->type == ECA_DEFAULT_CODE) { invoke_and_store(class, slot->value, hash, slot->key); } NEXT_SLOT:; // simulate continue label }
I32 p5_av_top_index(PerlInterpreter *my_perl, AV *av) { PERL_SET_CONTEXT(my_perl); return av_top_index(av); }
I32 p5_av_top_index(PerlInterpreter *my_perl, AV *av) { return av_top_index(av); }