Exemple #1
0
static void
sb__sort_within_major_buckets(
                 ms_elt_t **by_bucket_cursor, int major_bucket_count,
                                             int* count_by_bucket, SV** dest)
{
    int b, i;

    for ( b=0 ; b<major_bucket_count ; b++ ) {
        if (count_by_bucket[b]) {
            if (count_by_bucket[b] > 1) {
                ms_elt_t *bvals = by_bucket_cursor[b];
                int blen = count_by_bucket[b];
                int in_run =0, runstart =0;
                U32 prev;

                ms_do_mergesort(bvals, blen);
                bvals -= blen-1;

                /* Fall back to Perl's sort for runs that compared equal by 
                 * both major and minor bucket. */
                prev = bvals[0].mse_key;
                *dest++ = bvals[0].mse_sv;
                for ( i=1 ; i<blen ; i++ ) {
                    *dest++ = bvals[i].mse_sv;
                    if (in_run) {
                        if (bvals[i].mse_key != prev) {
                            /* End of the run, sort it in dest */
                            sortsv((dest-1)-(i-runstart), i-runstart, Perl_sv_cmp);
                            in_run = 0;
                            prev = bvals[i].mse_key;
                        }
                    } else {
                        if (bvals[i].mse_key == prev) {
                            /* The start of a new run */
                            in_run = 1;
                            runstart = i-1;
                        } else {
                            prev = bvals[i].mse_key;
                        }
                    }
                }
                if (in_run) {
                    /* This bucket ends on a run. */
                    sortsv(dest-(i-runstart), i-runstart, Perl_sv_cmp);
                }
            } else {
                *dest++ = by_bucket_cursor[b][0].mse_sv;
            }
        }
    }
}
    /* Run a query and return a list of ranked results.
     * Call this from Perl like `$index->fetch(['water', 'jar'])`. The result
     * will be an arrayref, with each entry of it being an arrayref of the form
     * [id, rank]. They will be sorted by their rank in descending order, so the
     * most relevant document will be at the top. */
    SV* fetch(SV* tokens) const
    {
        AV* av = reinterpret_cast<AV*>(SvRV(tokens));
        std::unordered_map<std::string, int> query;
        for (int i = 0; i <= av_top_index(av); ++i)
        {
            std::string token = string_from_sv(*av_fetch(av, i, 0));
            ++query[token];
        }

        AV* results = newAV();
        for (const auto& id2rank : search(query))
        {
            AV*    entry  = newAV();
            double length = lengths.find(id2rank.first)->second;

            av_push(entry, newSViv(id2rank.first));
            av_push(entry, newSVnv(id2rank.second / length));

            av_push(results, newRV_noinc(reinterpret_cast<SV*>(entry)));
        }

        sortsv(AvARRAY(results), av_top_index(results) + 1, sort_ratings);
        return newRV_noinc(reinterpret_cast<SV*>(results));
    }