Esempio n. 1
0
ivec_t do_hash(const words_t &strs, const ivec_t &pos, const ivec_t &assoc)
{
    ivec_t ivec;
    ivec.reserve(strs.size());
    for(auto &s:strs) {
        int t = s.length();
        for(auto p:pos)
            if(p < (int)s.size())
                t += assoc[s[p]];
        ivec.push_back(t);
    }
    return ivec;
}
Esempio n. 2
0
ivec_t find_assoc(const words_t &strs, const ivec_t &pos)
{
    ivec_t assoc;
    int current_dups = strs.size();
    int N = 127;
    std::vector<char> useful_chars;
    for(auto w:strs)
        for(auto c:w)
            if(!has(useful_chars, c))
                useful_chars.push_back(c);

    for(int i=0; i<N; ++i)
        assoc.push_back(0);

    int assoc_best = -1;
    int assoc_best_val = INT_MAX;
    for(int k=0; k<4; ++k)
    {
        for(int i:useful_chars) {
            assoc_best_val = INT_MAX;
            for(int j=0; j<100; ++j) {
                //printf(".");
                assoc[i] = j;
                auto hashed = do_hash(strs, pos, assoc);
                //for(int i=0; i<hashed.size(); ++i)
                //    printf("%d ", hashed[i]);
                //printf("\n");
                int d = count_dups(hashed);
                //printf("dup %d\n",d);
                if(d < assoc_best_val) {
                    assoc_best_val = d;
                    assoc_best = j;
                }
            }
            assoc[i] = assoc_best;
        }
        if(assoc_best_val >= current_dups)
            break;
        current_dups = assoc_best_val;
    }
    auto hashed = do_hash(strs, pos, assoc);
    //int d = count_dups(hashed);
    //printf("Total Dups Assoc: %d\n", d);
    return assoc;
}
Esempio n. 3
0
ivec_t find_pos(words_t &strs)
{
    ivec_t pos;
    int current_dups = strs.size();
    int N = 0;
    for(auto w:strs)
        N = rtosc_max(N,w.length());

    int pos_best = -1;
    int pos_best_val = INT_MAX;
    while(true)
    {
        for(int i=0; i<N; ++i) {
            ivec_t npos = pos;
            if(has(pos, i))
                continue;
            npos.push_back(i);
            auto hashed = do_hash(strs, npos);
            int d = count_dups(hashed);
            if(d < pos_best_val) {
                pos_best_val = d;
                pos_best = i;
            }
        }
        if(pos_best_val >= current_dups)
            break;
        current_dups = pos_best_val;
        pos.push_back(pos_best);
    }
    auto hashed = do_hash(strs, pos);
    int d = count_dups(hashed);
    //printf("Total Dups: %d\n", d);
    if(d != 0)
        pos.clear();
    return pos;
}
Esempio n. 4
0
bool marky::Backend_SQLite::get_next(const State& state, selector_t selector,
        scorer_t scorer, const words_t& search_words, word_t& next) {
#ifdef READ_DEBUG_ENABLED
    DEBUG("get_next(%s)", str(search_words).c_str());
#endif
    if (!bind_words(stmt_get_nexts, 1, search_words)) {
        sqlite3_clear_bindings(stmt_get_nexts);
        sqlite3_reset(stmt_get_nexts);
        return false;
    }

    bool ok = true;
    snippets_ptr_t snippets(new snippet_ptr_set_t);
    for (;;) {
        int step = sqlite3_step(stmt_get_nexts);
        bool done = false;
        switch (step) {
            case SQLITE_DONE:
                done = true;
                break;
            case SQLITE_ROW:
                {
                    words_t words;
                    unpack((const char*)sqlite3_column_text(stmt_get_nexts, 0), words);
                    snippet_t snippet(new Snippet(words,
                                    sqlite3_column_int64(stmt_get_nexts, 1),
                                    sqlite3_column_int64(stmt_get_nexts, 2),
                                    sqlite3_column_int64(stmt_get_nexts, 3)));
                    snippets->insert(snippet);
                    break;
                }
            default:
                ok = false;
                ERROR("Error when parsing response to '%s': %d/%s",
                        QUERY_GET_NEXTS, step, sqlite3_errmsg(db));
                break;
        }
        if (!ok || done) {
            break;
        }
    }
    sqlite3_clear_bindings(stmt_get_nexts);
    sqlite3_reset(stmt_get_nexts);

    if (snippets->empty()) {
        if (search_words.size() >= 2) {
            words_t search_words_shortened(++search_words.begin(), search_words.end());
#ifdef READ_DEBUG_ENABLED
            DEBUG("  get_next -> %s", str(search_words_shortened).c_str());
#endif
            /* recurse with shorter search */
            return get_next(state, selector, scorer, search_words_shortened, next);
        } else {
#ifdef READ_DEBUG_ENABLED
            DEBUG("    next_snippet -> NOTFOUND");
#endif
            next = IBackend::LINE_END;
        }
    } else {
        const words_t& next_snippet = selector(*snippets, scorer, state)->words;
#ifdef READ_DEBUG_ENABLED
        for (snippet_ptr_set_t::const_iterator siter = snippets->begin();
             siter != snippets->end(); ++siter) {
            DEBUG("  nexts%s = snippet(%s, %lu)", str(search_words).c_str(),
                    str((*siter)->words).c_str(), (*siter)->score(scorer, state));
        }
        DEBUG("    next_snippet -> %s", str(next_snippet).c_str());
#endif
        next = next_snippet.back();
    }
    return ok;
}