void GenerateInvertedIndexFile()
{
    FILE *mergedIndex = fopen(Tempmergedindex, "r");
    RawPost_item posting;
    char invertedWord[MAX_WORD_LENGTH];
    uint32_t invertedDocID;
    uint32_t invertedPost;
    InvertedTable invertedTable;
    lex_table lextable;
    string lastWord = "";
    uint32_t lastCounter = 0;
    uint32_t count = 0;
    while(fscanf(mergedIndex, "%d %s %d\n", &invertedDocID, invertedWord, &invertedPost) != EOF) {
        posting.docID = invertedDocID;
        posting.word=invertedWord;
        posting.pos = invertedPost;

        // first time
        if(lastWord == "") {
            lastWord = posting.word;
            lex_item *lexiconitem = new lex_item;
            lexiconitem->word = posting.word;
            lexiconitem->invertedPointer = 0;
            lextable.push_back(lexiconitem);
        }

        uint32_t count = invertedTable.Insert_table(&posting);
        if(count > 0) {   // count > 0 means meet new word and there is data written
        	lex_item *lexiconitem = lextable.back();
            lexiconitem->fileID = invertedTable.GetFileID();
            lexiconitem->num = invertedTable.GetDocNumLastWord();
            if(count < lastCounter)  lexiconitem->invertedPointer = 0;
            lexiconitem->size = count - lexiconitem->invertedPointer;

            lexiconitem = new lex_item;
            lexiconitem->word = posting.word;
            lastWord = posting.word;
            lexiconitem->invertedPointer = count;
            lextable.push_back(lexiconitem);
            lastCounter = count;
        }
    }

    count = invertedTable.WriteOutstanding();
    lextable.back()->fileID = invertedTable.GetFileID();
    lextable.back()->num = invertedTable.GetDocNumLastWord();
    if(count > 0 && count < lastCounter)  lextable.back()->invertedPointer = 0;
    lextable.back()->size = count - lextable.back()->invertedPointer;

    Writelextable(&lextable, CURRENT_FILEMODE);
}
Beispiel #2
0
void GenerateInvertedIndexFile()
{
    //convert it into inverted index
    FILE *mergedIndex = fopen(TMP_INDEX_PATH, "r");
    RawPosting posting;
    char invertedWord[MAX_WORD_LENGTH];
    uint32_t invertedContext;
    uint32_t invertedDocID;
    uint32_t invertedPost;
    InvertedTable invertedTable;
    LexiconTable lexiconTable;
    string lastWord = "";
    uint32_t lastCounter = 0;
    uint32_t count = 0;
    while(fscanf(mergedIndex, "%d %s %d %d\n", &invertedDocID, invertedWord, &invertedPost, &invertedContext) != EOF) {
        if(strlen(invertedWord) > MAX_WORD_LENGTH) {
            cout<<invertedWord<<endl;
            cout<<strlen(invertedWord)<<endl;
            exit(1);
        }
        posting.docID = invertedDocID;
        posting.word=invertedWord;
        bzero(invertedWord, MAX_WORD_LENGTH);
        posting.context = invertedContext;
        posting.pos = invertedPost;
        
        // first time
        if(lastWord == "") {
            lastWord = posting.word;
            LexiconItem *lexiconItem = new LexiconItem;
            lexiconItem->word = posting.word;
            lexiconItem->invertedPointer = 0;
            lexiconTable.push_back(lexiconItem);
        }
        
        count = invertedTable.Insert(&posting);
        
        // count > 0 means meet new word and there is data written
        if(count > 0) {
            LexiconItem *lexiconItem = lexiconTable.back();
            lexiconItem->fileID = invertedTable.GetFileID();
            lexiconItem->num = invertedTable.GetDocNumLastWord();
            if(count < lastCounter) {
                lexiconItem->invertedPointer = 0;
            }
            
            lexiconItem = new LexiconItem;
            lexiconItem->word = posting.word;
            lastWord = posting.word;
            lexiconItem->invertedPointer = count;
            lexiconTable.push_back(lexiconItem);
            lastCounter = count;
        }
        
    }
    
    count = invertedTable.WriteOutstanding();
    lexiconTable.back()->fileID = invertedTable.GetFileID();
    lexiconTable.back()->num = invertedTable.GetDocNumLastWord();
    if(count > 0 && count < lastCounter) {
        lexiconTable.back()->invertedPointer = 0;
    }
    
    WriteLexiconTable(&lexiconTable, CURRENT_FILEMODE);
}