Ejemplo n.º 1
0
int
ExpressionEncoder::encode(const IndexingOptions& options,
                          const CmmlToken* expression,
                          vector<encoded_token_t>* encodedFormula,
                          ExpressionInfo* expressionInfo) {
    int rv = 0;
    stack<const CmmlToken*> dfs_stack;
    MeaningDictionary namedVarDictionary;
    int anonVarId = 0;

    encodedFormula->clear();

    dfs_stack.push(expression);
    while (!dfs_stack.empty()) {
        const CmmlToken* token = dfs_stack.top();
        dfs_stack.pop();
        encoded_token_t encoded_token;

        if (token->isVar()) {
            string qvarName = token->getVarName();
            encoded_token.arity = 1;
            if (qvarName == "") {
                encoded_token.id = _getAnonVarOffset() + anonVarId;
            } else {
                encoded_token.id = _getNamedVarOffset() +
                        namedVarDictionary.put(qvarName);
                if (expressionInfo != NULL) {
                    expressionInfo->qvarNames.push_back(qvarName);
                    expressionInfo->qvarXpaths.push_back(
                                token->getXpathRelative());
                }
            }
        } else {
            encoded_token.arity = token->getArity();
            if (options.renameCi && token->getTag() == "ci") {
                encoded_token.id = _getCiMeaning((token));
            } else {
                encoded_token.id = _getConstantEncoding(token->getMeaning());
            }
            if (encoded_token.id == MeaningDictionary::KEY_NOT_FOUND) {
                rv = -1;
            }
        }
        encodedFormula->push_back(encoded_token);

        // Replenish stack
        for (auto rIt = token->getChildNodes().rbegin();
             rIt != token->getChildNodes().rend(); rIt ++) {
            dfs_stack.push(*rIt);
        }
    }

    return rv;
}
Ejemplo n.º 2
0
MeaningDictionary get_meaning_dict() {
    MeaningDictionary dict;
    dict.put("apply#");
    dict.put("f#");
    dict.put("h#");
    dict.put("t#");
    dict.put("g#");
    dict.put("cn#3.5");

    return dict;
}
Ejemplo n.º 3
0
int main(int argc, char* argv[]) {
    mws::daemon::Config config;
    FlagParser::addFlag('I', "index-path",              FLAG_REQ, ARG_REQ);
    FlagParser::addFlag('H', "harvest-path",            FLAG_REQ, ARG_REQ);
    FlagParser::addFlag('c', "enable-ci-renaming",      FLAG_OPT, ARG_NONE);
    FlagParser::addFlag('e', "harvest-file-extension",  FLAG_OPT, ARG_REQ);
    FlagParser::addFlag('r', "recursive",               FLAG_OPT, ARG_NONE);

    if ((FlagParser::parse(argc, argv)) != 0) {
        fprintf(stderr, "%s", FlagParser::getUsage().c_str());
        return EXIT_FAILURE;
    }

    // harvest path
    config.dataPath = FlagParser::getArg('H').c_str();
    config.indexingOptions.renameCi = FlagParser::hasArg('c');
    config.recursive = FlagParser::hasArg('r');
    string indexPath = FlagParser::getArg('I').c_str();

    // harvest file extension
    if (FlagParser::hasArg('e')) {
        config.harvestFileExtension = FlagParser::getArg('e');
    } else {
        config.harvestFileExtension = DEFAULT_MWS_HARVEST_SUFFIX;
    }

    index_handle_t* data;
    MeaningDictionary* meaningDictionary;
    FormulaDb* formulaDb;
    LevFormulaDb* fmdb = new LevFormulaDb();
    string fmdbPath = indexPath + "/formula.db";

    try {
        fmdb->open(fmdbPath.c_str());
        formulaDb = fmdb;
    }
    catch(const exception &e) {
        PRINT_WARN("Initializing database: %s\n", e.what());
        return EXIT_FAILURE;
    }

    /*
     * Initializing data
     */
    string ms_path = indexPath + "/memsector.dat";
    memsector_handle_t msHandle;
    memsector_load(&msHandle, ms_path.c_str());

    data = new index_handle_t;
    *data = msHandle.index;

    /*
     * Initializing meaningDictionary
     */
    meaningDictionary = new MeaningDictionary();
    filebuf fb;
    istream os(&fb);
    fb.open((indexPath + "/meaning.dat").c_str(), ios::in);
    meaningDictionary->load(os);
    fb.close();

    common::utils::FileCallback fileCallback =
            [&] (const std::string& path, const std::string& prefix) {
        UNUSED(prefix);
        if (common::utils::hasSuffix(path, config.harvestFileExtension)) {
            printf("Loading %s... ", path.c_str());
            int fd = open(path.c_str(), O_RDONLY);
            if (fd < 0) {
                return -1;
            }
            ParseResult parseReturn = parseMwsHarvestFromFd(config, data,
                                                            meaningDictionary,
                                                            formulaDb, fd);
            writeHitsToJSON(parseReturn.data, parseReturn.hits, path);
        } else {
            printf("Skipping \"%s\": bad extension\n", path.c_str());
        }

        return 0;
    };
    common::utils::DirectoryCallback shouldRecurse =
            [](const std::string partialPath) {
        UNUSED(partialPath);
        return true;
    };

    printf("Loading harvest files...\n");
    if (config.recursive) {
        if (common::utils::foreachEntryInDirectory(config.dataPath,
                                                   fileCallback,
                                                   shouldRecurse)) {
            fprintf(stderr, "Error in foreachEntryInDirectory (recursive)");
            return EXIT_FAILURE;
        }
    } else {
        if (common::utils::foreachEntryInDirectory(config.dataPath,
                                                       fileCallback)) {
            fprintf(stderr, "Error in foreachEntryInDirectory (non-recursive)");
            return EXIT_FAILURE;
        }
    }

    return EXIT_SUCCESS;
}