Ejemplo n.º 1
0
/*
 * Checks whether a word is a stopwords
 */
int is_stopword(char *word) {
    if (!stopwords) {
        load_stopwords();
    }

    return find_str(stopwords, sizeof(char *), word, 0, nr_stopwords - 1) != -1;
}
Ejemplo n.º 2
0
int init(iplus1_lang_t* lang)
{
    strcpy(lang->lang, "eng");
    lang->full_lang = strdup("english");
    lang->param = malloc(sizeof(iplus1_english_t));
    if (lang->param == NULL)
        return IPLUS1_FAIL;
    lang->parse = parse;
    
    iplus1_english_t* eng = (iplus1_english_t*)lang->param;
    if ((eng->stemmer = sb_stemmer_new("eng", "UTF_8")) == NULL) {
        fprintf(stderr, "could not find english stemmer\n");
        return IPLUS1_FAIL;
    }
    
    load_stopwords(eng);
    return IPLUS1_SUCCESS;
}
Ejemplo n.º 3
0
int init(iplus1_lang_t* lang)
{
    strcpy(lang->lang, "deu");
    lang->full_lang = strdup("german");
    lang->param = malloc(sizeof(iplus1_german_t));
    if (lang->param == NULL)
        return IPLUS1_FAIL;
    lang->parse = parse;
    
    iplus1_german_t* deu = (iplus1_german_t*)lang->param;
    if ((deu->stemmer = sb_stemmer_new("deu", "UTF_8")) == NULL) {
        fprintf(stderr, "could not find german stemmer\n");
        return IPLUS1_FAIL;
    }
    
    load_stopwords(deu);
    return IPLUS1_SUCCESS;
}
Ejemplo n.º 4
0
int init(iplus1_lang_t* lang)
{
    strcpy(lang->lang, "por");
    lang->full_lang = strdup("portugese");
    lang->param = malloc(sizeof(iplus1_portugese_t));
    if (lang->param == NULL)
        return IPLUS1_FAIL;
    lang->parse = parse;
    
    iplus1_portugese_t* por = (iplus1_portugese_t*)lang->param;
    if ((por->stemmer = sb_stemmer_new("por", "UTF_8")) == NULL) {
        fprintf(stderr, "could not find portugese stemmer\n");
        return IPLUS1_FAIL;
    }
    
    load_stopwords(por);
    return IPLUS1_SUCCESS;
}
Ejemplo n.º 5
0
int main(int argc, void *argv) {
    load_stopwords();
    index_p index = load_index();

    int exit = 0;
    while (!exit) {
        printf(" > ");
        char *command = read_line(stdin);

        if (!strcmp(command, "exit")) {
            // exit command
            exit = 1;
            printf("Exit requested..\n");

		} else if (!strcmp(command, "rebuild index")) {
            // rebuild index command
            rebuild_index(index);
		} else if (starts_with(command, "search for ")) {
            // search for <search_query> command
            char *query = (char *) malloc(strlen(command) - 10);
            memcpy(query, command+11, strlen(command) - 10);

            index_p result = search_index(&index, query);

            printf("Results (showing no more than 10, there might be more):\n");
            if (result) {
                // print result
                int count = 0;
                indexed_word_p w = result->words;
                if (!w) {
                    printf("No documents found for search term %s\n", query);
                }

                while (w) {
                    printf("Documents containing %s:\n", w->stem);

                    int i;
                    for (i = 0; i < w->nr_docs; i++, count++) {
                        printf(" [%d] %s\n", count, result->documents[w->documents[i].id].name);
                    }

                    w = w->next;
                }

                close_index(result);
            } else {
                printf("No documents found for search term %s\n", query);
            }

            free(query);

        } else if (starts_with(command, "add file ")) {
            // add file <file> command
            char *file = (char*) malloc(strlen(command) - 8);
            memcpy(file, command+9, strlen(command) - 8);

			index = add_file(index, file);
            free(file);

        } else if (starts_with(command, "remove file ")) {
            // remove file <file> command
            char *file = (char*) malloc(strlen(command) - 11);
            memcpy(file, command+12, strlen(command) - 11);

            // obtain document id a.k.a. index in filebase
            int doc_id = find_str(&index->documents[0].name, sizeof(indexed_document_t), file, 0, index->nr_docs - 1);

            if (doc_id < 0) {
                printf("Error: %s is not in the filebase!\n", file);
            } else {
                remove_file(index, doc_id);
            }

            free(file);
        }

        free(command);
    }

    // release memory
    release_stopwords();
    close_index(index);

    return 0;
}