void GenerateInvertedIndexFile() { FILE *mergedIndex = fopen(Tempmergedindex, "r"); RawPost_item posting; char invertedWord[MAX_WORD_LENGTH]; uint32_t invertedDocID; uint32_t invertedPost; InvertedTable invertedTable; lex_table lextable; string lastWord = ""; uint32_t lastCounter = 0; uint32_t count = 0; while(fscanf(mergedIndex, "%d %s %d\n", &invertedDocID, invertedWord, &invertedPost) != EOF) { posting.docID = invertedDocID; posting.word=invertedWord; posting.pos = invertedPost; // first time if(lastWord == "") { lastWord = posting.word; lex_item *lexiconitem = new lex_item; lexiconitem->word = posting.word; lexiconitem->invertedPointer = 0; lextable.push_back(lexiconitem); } uint32_t count = invertedTable.Insert_table(&posting); if(count > 0) { // count > 0 means meet new word and there is data written lex_item *lexiconitem = lextable.back(); lexiconitem->fileID = invertedTable.GetFileID(); lexiconitem->num = invertedTable.GetDocNumLastWord(); if(count < lastCounter) lexiconitem->invertedPointer = 0; lexiconitem->size = count - lexiconitem->invertedPointer; lexiconitem = new lex_item; lexiconitem->word = posting.word; lastWord = posting.word; lexiconitem->invertedPointer = count; lextable.push_back(lexiconitem); lastCounter = count; } } count = invertedTable.WriteOutstanding(); lextable.back()->fileID = invertedTable.GetFileID(); lextable.back()->num = invertedTable.GetDocNumLastWord(); if(count > 0 && count < lastCounter) lextable.back()->invertedPointer = 0; lextable.back()->size = count - lextable.back()->invertedPointer; Writelextable(&lextable, CURRENT_FILEMODE); }
void GenerateInvertedIndexFile() { //convert it into inverted index FILE *mergedIndex = fopen(TMP_INDEX_PATH, "r"); RawPosting posting; char invertedWord[MAX_WORD_LENGTH]; uint32_t invertedContext; uint32_t invertedDocID; uint32_t invertedPost; InvertedTable invertedTable; LexiconTable lexiconTable; string lastWord = ""; uint32_t lastCounter = 0; uint32_t count = 0; while(fscanf(mergedIndex, "%d %s %d %d\n", &invertedDocID, invertedWord, &invertedPost, &invertedContext) != EOF) { if(strlen(invertedWord) > MAX_WORD_LENGTH) { cout<<invertedWord<<endl; cout<<strlen(invertedWord)<<endl; exit(1); } posting.docID = invertedDocID; posting.word=invertedWord; bzero(invertedWord, MAX_WORD_LENGTH); posting.context = invertedContext; posting.pos = invertedPost; // first time if(lastWord == "") { lastWord = posting.word; LexiconItem *lexiconItem = new LexiconItem; lexiconItem->word = posting.word; lexiconItem->invertedPointer = 0; lexiconTable.push_back(lexiconItem); } count = invertedTable.Insert(&posting); // count > 0 means meet new word and there is data written if(count > 0) { LexiconItem *lexiconItem = lexiconTable.back(); lexiconItem->fileID = invertedTable.GetFileID(); lexiconItem->num = invertedTable.GetDocNumLastWord(); if(count < lastCounter) { lexiconItem->invertedPointer = 0; } lexiconItem = new LexiconItem; lexiconItem->word = posting.word; lastWord = posting.word; lexiconItem->invertedPointer = count; lexiconTable.push_back(lexiconItem); lastCounter = count; } } count = invertedTable.WriteOutstanding(); lexiconTable.back()->fileID = invertedTable.GetFileID(); lexiconTable.back()->num = invertedTable.GetDocNumLastWord(); if(count > 0 && count < lastCounter) { lexiconTable.back()->invertedPointer = 0; } WriteLexiconTable(&lexiconTable, CURRENT_FILEMODE); }