void setUpDirs(CuTest *tc, Directory * dir, Directory * aux) { IndexWriter4Test * writer = NULL; WhitespaceAnalyzer analyzer; writer = newWriter(dir, &analyzer, true); writer->setMaxBufferedDocs(1000); // add 1000 documents in 1 segment addDocs(writer, 1000); assertEquals(1000, writer->docCount()); assertEquals(1, writer->getSegmentCount()); writer->close(); _CLLDELETE(writer); writer = newWriter(aux, &analyzer, true); writer->setUseCompoundFile(false); // use one without a compound file writer->setMaxBufferedDocs(100); writer->setMergeFactor(10); // add 30 documents in 3 segments for (int i = 0; i < 3; i++) { addDocs(writer, 10); writer->close(); _CLLDELETE(writer); writer = newWriter(aux, &analyzer, false); writer->setUseCompoundFile(false); // use one without a compound file writer->setMaxBufferedDocs(100); writer->setMergeFactor(10); } assertEquals(30, writer->docCount()); assertEquals(3, writer->getSegmentCount()); writer->close(); _CLLDELETE(writer); }
// case 0: add self or exceed maxMergeDocs, expect exception void testAddSelf(CuTest * tc) { // main directory Directory * dir = _CLNEW RAMDirectory(); // auxiliary directory Directory * aux = _CLNEW RAMDirectory(); IndexWriter4Test * writer = NULL; WhitespaceAnalyzer analyzer; writer = newWriter(dir, &analyzer, true); // add 100 documents addDocs(writer, 100); assertEquals(100, writer->docCount()); writer->close(); _CLLDELETE(writer); writer = newWriter(aux, &analyzer, true); writer->setUseCompoundFile(false); // use one without a compound file writer->setMaxBufferedDocs(1000); // add 140 documents in separate files addDocs(writer, 40); writer->close(); _CLLDELETE(writer); writer = newWriter(aux, &analyzer, true); writer->setUseCompoundFile(false); // use one without a compound file writer->setMaxBufferedDocs(1000); addDocs(writer, 100); writer->close(); _CLLDELETE(writer); writer = newWriter(dir, &analyzer, false); try { // cannot add self ValueArray<Directory*> dirs(2); dirs[0] = aux; dirs[1] = dir; writer->addIndexesNoOptimize( dirs ); assertTrue(false); } catch (CLuceneError&) { assertEquals(100, writer->docCount()); } writer->close(); _CLLDELETE(writer); // make sure the index is correct verifyNumDocs(tc, dir, 100); dir->close(); _CLLDELETE(dir); aux->close(); _CLLDELETE(aux); }
// case 2: tail segments, invariants hold, no copy void testNoCopySegments(CuTest * tc) { // main directory Directory * dir = _CLNEW RAMDirectory(); // auxiliary directory Directory * aux = _CLNEW RAMDirectory(); WhitespaceAnalyzer an; setUpDirs(tc, dir, aux); IndexWriter4Test * writer = newWriter(dir, &an, false); writer->setMaxBufferedDocs(9); writer->setMergeFactor(4); addDocs(writer, 2); ValueArray<Directory*> dirs(1); dirs[0] = aux; writer->addIndexesNoOptimize(dirs); assertEquals(1032, writer->docCount()); assertEquals(2, writer->getSegmentCount()); assertEquals(1000, writer->getDocCount(0)); writer->close(); _CLLDELETE(writer); // make sure the index is correct verifyNumDocs(tc, dir, 1032); dir->close(); _CLLDELETE(dir); aux->close(); _CLLDELETE(aux); }
void testSimpleCase(CuTest *tc) { // main directory Directory * dir = _CLNEW RAMDirectory(); // two auxiliary directories Directory * aux = _CLNEW RAMDirectory(); Directory * aux2 = _CLNEW RAMDirectory(); IndexWriter4Test * writer = NULL; WhitespaceAnalyzer analyzer; writer = newWriter(dir, &analyzer, true); // add 100 documents addDocs(writer, 100); assertEquals(100, writer->docCount()); writer->close(); _CLLDELETE(writer); writer = newWriter(aux, &analyzer, true); writer->setUseCompoundFile(false); // use one without a compound file // add 40 documents in separate files addDocs(writer, 40); assertEquals(40, writer->docCount()); writer->close(); _CLLDELETE(writer); writer = newWriter(aux2, &analyzer, true); // add 40 documents in compound files addDocs2(writer, 50); assertEquals(50, writer->docCount()); writer->close(); _CLLDELETE(writer); // test doc count before segments are merged writer = newWriter(dir, &analyzer, false); assertEquals(100, writer->docCount()); { ValueArray<Directory*> dirs(2); dirs[0] = aux; dirs[1] = aux2; writer->addIndexesNoOptimize( dirs ); } assertEquals(190, writer->docCount()); writer->close(); _CLLDELETE(writer); // make sure the old index is correct verifyNumDocs(tc, aux, 40); // make sure the new index is correct verifyNumDocs(tc, dir, 190); // now add another set in. Directory * aux3 = _CLNEW RAMDirectory(); writer = newWriter(aux3, &analyzer, true); // add 40 documents addDocs(writer, 40); assertEquals(40, writer->docCount()); writer->close(); _CLLDELETE(writer); // test doc count before segments are merged/index is optimized writer = newWriter(dir, &analyzer, false); assertEquals(190, writer->docCount()); { ValueArray<Directory*> dirs(1); dirs[0] = aux3; writer->addIndexesNoOptimize( dirs ); } assertEquals(230, writer->docCount()); writer->close(); _CLLDELETE(writer); // make sure the new index is correct verifyNumDocs(tc, dir, 230); Term t1(_T("content"), _T("aaa")); Term t2(_T("content"), _T("bbb")); verifyTermDocs(tc, dir, &t1, 180); verifyTermDocs(tc, dir, &t2, 50); // now optimize it. writer = newWriter(dir, &analyzer, false); writer->optimize(); writer->close(); _CLLDELETE(writer); // make sure the new index is correct verifyNumDocs(tc, dir, 230); verifyTermDocs(tc, dir, &t1, 180); verifyTermDocs(tc, dir, &t2, 50); // now add a single document Directory * aux4 = _CLNEW RAMDirectory(); writer = newWriter(aux4, &analyzer, true); addDocs2(writer, 1); writer->close(); _CLLDELETE(writer); writer = newWriter(dir, &analyzer, false); assertEquals(230, writer->docCount()); { ValueArray<Directory*> dirs(1); dirs[0] = aux4; writer->addIndexesNoOptimize( dirs ); } assertEquals(231, writer->docCount()); writer->close(); _CLLDELETE(writer); verifyNumDocs(tc, dir, 231); verifyTermDocs(tc, dir, &t2, 51); dir->close(); _CLLDELETE(dir); aux->close(); _CLLDELETE(aux); aux2->close(); _CLLDELETE(aux2); aux3->close(); _CLLDELETE(aux3); aux4->close(); _CLLDELETE(aux4); }
int addToHash(char *word, char *fileName, HashTable *Index){ // get key unsigned long key = JenkinsHash(word, MAX_HASH_SLOT); // if word is in hashtable, add doc and return if(hashLookUp(word, Index) == 0){ // get word doc if(strcmp(Index->table[key]->word, word) == 0){ addDocs(Index->table[key], fileName, Index); return 0; } wordNode *tmp = Index->table[key]; while(tmp){ if(strcmp(tmp->word, word) == 0){ addDocs(tmp, fileName, Index); } tmp= tmp->next; } return 0; } ///////////////////////////////////////////////////////////////////////////////////// // create node wordNode *node = malloc(sizeof(wordNode)); node->word = malloc(strlen(word)+1); strcpy(node->word, word); node->next = NULL; node->doc = NULL; if(!node){ return 3; } if(Index->table[key] == NULL){ Index->table[key] = node; addDocs(Index->table[key], fileName, Index); return 1; } // if hashtable new word if(hashLookUp(word, Index) == 1){ if(Index->table[key]){ wordNode *tmpNode = Index->table[key]; while(tmpNode->next != NULL){ tmpNode=tmpNode->next; } tmpNode->next = node; addDocs(tmpNode->next, fileName, Index); return 1; } } return 0; }