int create_index_on(TCHAR * file_path)//建立索引的接口 命令行和图形界面通用
{

	char mutil_file_path[MAX_PATH]={};
	WideCharToMultiByte(CP_OEMCP,NULL,file_path,-1,mutil_file_path,MAX_PATH,NULL,FALSE);
	MemoryIndexTree * temp_index=make_memory_index_tree_all_file(file_path,NULL);
	THE_NEXT_INDEX * temp_next_index=make_next_index__(temp_index);
	char index_file_name[MAX_PATH]={};
	strcpy(index_file_name,mutil_file_path);
	strcat(index_file_name,INDEX_DB_NAME);

	char next_index_file_name[MAX_PATH]={};
	strcpy(next_index_file_name,mutil_file_path);
	strcat(next_index_file_name,NEXT_INDEX_DB_NAME);
	int res=write_buf_to_file(temp_next_index->next_index__,
		temp_next_index->length/* 这个地方的length是字节数不是个数*/
		,next_index_file_name,"wb");
	if (res<=0)
	{
		printf("二级索引写入失败\n");
	}else
	{
		printf("二级索引写入成功\n");
	}

	if (write_index_to_file(temp_index,index_file_name,"wb")>0)
	{
		printf("一级索引写入成功\n");
	}
	return 0;
}
Пример #2
0
/*
 * Adds a file to the index
 */
index_p add_file(index_p index, char *file) {
    // check if file exists and can be read
    FILE *f = fopen(file, "r");
    if (!f) {
        printf("Cannot open %s!\nIndex not updated.\n", file);
        return index;
    }
    fclose(f);

    // insert file into file list (alphabetically ordered)
    int doc_id = 0;

    // always insert temporary search document in the beginning
    if (strcmp(file, "._tmp_search_doc")) {
        for (doc_id = 0; doc_id < index->nr_docs; doc_id++) {
            int cmp = strcmp(index->documents[doc_id].name, file);

            if (!cmp) {
                printf("%s is already in the filebase.\n", file);
                return index;
            } else if (0 < cmp) {
                // right position in list found
                break;
            }
        }
    }

    // insert document in list
    index = (index_p) realloc(index, sizeof(index_t) + sizeof(indexed_document_t) * (index->nr_docs + 1));
    memmove(&index->documents[doc_id+1], &index->documents[doc_id], sizeof(indexed_document_t) * (index->nr_docs - doc_id));

    index->documents[doc_id].name = (char *) malloc(strlen(file) + 1);
    memcpy(index->documents[doc_id].name, file, strlen(file) + 1);
    index->documents[doc_id].nr_words = 0;
    index->nr_docs++;

    // update indices: increase indices which are greater or equal to doc_id of added document
    indexed_word_p w = index->words;
    while (w) {
        int i;
        for (i = 0; i < w->nr_docs; i++) {
            if (w->documents[i].id >= doc_id) {
                w->documents[i].id++;
            }
        }

        w = w->next;
    }

    // parse file contents and add words to index
    parse_file_for_index(index, file);
    write_index_to_file(index);
    return index;
}
Пример #3
0
/*
 * Regenerates the index based on the files in the filebase
 */
void rebuild_index(index_p index) {
    // clear index but keep filebase
    indexed_word_p w;
    while ((w = index->words)) {
        index->words = w->next;

        free(w->stem);
        free(w);
    }

    index->nr_words = 0;

    // rescan every document
    int i;
    for (i = 0; i < index->nr_docs; i++) {
        index->documents[i].nr_words = 0;
        parse_file_for_index(index, index->documents[i].name);
    }

    // save
    write_index_to_file(index);
}
Пример #4
0
/*
 * Removes a file from index
 */
void remove_file(index_p index, int doc_id) {
    // open file or print error message
    if (!index->nr_docs) {
        printf("Filebase empty!\n");
        return;
    }

    if (doc_id < 0 || doc_id >= index->nr_docs) {
        printf("Error: illegal document id. No document removed!\n");
    }

    // remove document from list in index
    free(index->documents[doc_id].name);
    memmove(&index->documents[doc_id], &index->documents[doc_id+1], sizeof(indexed_document_t) * (index->nr_docs - 1 - doc_id));
    index->nr_docs--;

    indexed_word_p w = index->words;    // current word
    indexed_word_p p = NULL;            // previous word

    // remove document from the list of each indexed word
    while (w) {
        // find index of removed document in list (or of first document with higher id)
        int i;
        int remove = 0;
        for (i = 0; i < w->nr_docs; i++) {
            if (w->documents[i].id == doc_id) {
                w->nr_docs--;
                // document found in list, indicate removal
                remove = 1;
                break;
            } else if (w->documents[i].id > doc_id) {
                break;
            }
        }

        // reduce document id of all documents with id > removed document id
        // and shift array items (in order to remove entry of the document we want to remove) if neccessary
        for (; i < w->nr_docs; i++) {
            w->documents[i] = w->documents[i+remove];
            w->documents[i].id--;
        }

        if (w->nr_docs == 0) {
            // only occurance of this word is in removed document -> remove word from index
            if (!p) {
                index->words = w->next;
            } else {
                p->next = w->next;
            }

            index->nr_words--;

            indexed_word_p n = w->next;
            free(w->stem);
            free(w);
            w = n;
        } else {
            // get next indexed word
            p = w;
            w = w->next;
        }
    }

    // commit changes to file
    write_index_to_file(index);
}