void setUpDirs(CuTest *tc, Directory * dir, Directory * aux) {

    IndexWriter4Test * writer = NULL;
    WhitespaceAnalyzer analyzer;

    writer = newWriter(dir, &analyzer, true);
    writer->setMaxBufferedDocs(1000);
    // add 1000 documents in 1 segment
    addDocs(writer, 1000);
    assertEquals(1000, writer->docCount());
    assertEquals(1, writer->getSegmentCount());
    writer->close();
    _CLLDELETE(writer);

    writer = newWriter(aux, &analyzer, true);
    writer->setUseCompoundFile(false); // use one without a compound file
    writer->setMaxBufferedDocs(100);
    writer->setMergeFactor(10);
    // add 30 documents in 3 segments
    for (int i = 0; i < 3; i++) {
        addDocs(writer, 10);
        writer->close();
        _CLLDELETE(writer);
        writer = newWriter(aux, &analyzer, false);
        writer->setUseCompoundFile(false); // use one without a compound file
        writer->setMaxBufferedDocs(100);
        writer->setMergeFactor(10);
    }
    assertEquals(30, writer->docCount());
    assertEquals(3, writer->getSegmentCount());
    writer->close();
    _CLLDELETE(writer);
}
// case 0: add self or exceed maxMergeDocs, expect exception
void testAddSelf(CuTest * tc)  {

    // main directory
    Directory * dir = _CLNEW RAMDirectory();
    // auxiliary directory
    Directory * aux = _CLNEW RAMDirectory();

    IndexWriter4Test * writer = NULL;
    WhitespaceAnalyzer analyzer;

    writer = newWriter(dir, &analyzer, true);
    // add 100 documents
    addDocs(writer, 100);
    assertEquals(100, writer->docCount());
    writer->close();
    _CLLDELETE(writer);

    writer = newWriter(aux, &analyzer, true);
    writer->setUseCompoundFile(false); // use one without a compound file
    writer->setMaxBufferedDocs(1000);
    // add 140 documents in separate files
    addDocs(writer, 40);
    writer->close();
    _CLLDELETE(writer);

    writer = newWriter(aux, &analyzer, true);
    writer->setUseCompoundFile(false); // use one without a compound file
    writer->setMaxBufferedDocs(1000);
    addDocs(writer, 100);
    writer->close();
    _CLLDELETE(writer);

    writer = newWriter(dir, &analyzer, false);
    try {
      // cannot add self
      ValueArray<Directory*> dirs(2);
      dirs[0] = aux;
      dirs[1] = dir;
      writer->addIndexesNoOptimize( dirs );
      assertTrue(false);
    }
    catch (CLuceneError&) {
      assertEquals(100, writer->docCount());
    }
    writer->close();
    _CLLDELETE(writer);

    // make sure the index is correct
    verifyNumDocs(tc, dir, 100);

    dir->close();
    _CLLDELETE(dir);

    aux->close();
    _CLLDELETE(aux);
}
// case 2: tail segments, invariants hold, no copy
void testNoCopySegments(CuTest * tc)  {

    // main directory
    Directory * dir = _CLNEW RAMDirectory();
    // auxiliary directory
    Directory * aux = _CLNEW RAMDirectory();

    WhitespaceAnalyzer  an;

    setUpDirs(tc, dir, aux);

    IndexWriter4Test * writer = newWriter(dir, &an, false);
    writer->setMaxBufferedDocs(9);
    writer->setMergeFactor(4);
    addDocs(writer, 2);

    ValueArray<Directory*> dirs(1);
    dirs[0] = aux;
    writer->addIndexesNoOptimize(dirs);

    assertEquals(1032, writer->docCount());
    assertEquals(2, writer->getSegmentCount());
    assertEquals(1000, writer->getDocCount(0));
    writer->close();
    _CLLDELETE(writer);

    // make sure the index is correct
    verifyNumDocs(tc, dir, 1032);

    dir->close();
    _CLLDELETE(dir);

    aux->close();
    _CLLDELETE(aux);
}
void testSimpleCase(CuTest *tc) {

    // main directory
    Directory * dir = _CLNEW RAMDirectory();
    // two auxiliary directories
    Directory * aux = _CLNEW RAMDirectory();
    Directory * aux2 = _CLNEW RAMDirectory();

    IndexWriter4Test * writer = NULL;

    WhitespaceAnalyzer analyzer;

    writer = newWriter(dir, &analyzer, true);

    // add 100 documents
    addDocs(writer, 100);
    assertEquals(100, writer->docCount());
    writer->close();
    _CLLDELETE(writer);

    writer = newWriter(aux, &analyzer, true);
    writer->setUseCompoundFile(false); // use one without a compound file
    // add 40 documents in separate files
    addDocs(writer, 40);
    assertEquals(40, writer->docCount());
    writer->close();
    _CLLDELETE(writer);

    writer = newWriter(aux2, &analyzer, true);
    // add 40 documents in compound files
    addDocs2(writer, 50);
    assertEquals(50, writer->docCount());
    writer->close();
    _CLLDELETE(writer);

    // test doc count before segments are merged
    writer = newWriter(dir, &analyzer, false);
    assertEquals(100, writer->docCount());
    {
        ValueArray<Directory*> dirs(2);
        dirs[0] = aux;
        dirs[1] = aux2;
        writer->addIndexesNoOptimize( dirs );
    }
    assertEquals(190, writer->docCount());
    writer->close();
    _CLLDELETE(writer);

    // make sure the old index is correct
    verifyNumDocs(tc, aux, 40);

    // make sure the new index is correct
    verifyNumDocs(tc, dir, 190);

    // now add another set in.
    Directory * aux3 = _CLNEW RAMDirectory();
    writer = newWriter(aux3, &analyzer, true);
    // add 40 documents
    addDocs(writer, 40);
    assertEquals(40, writer->docCount());
    writer->close();
    _CLLDELETE(writer);

    // test doc count before segments are merged/index is optimized
    writer = newWriter(dir, &analyzer, false);
    assertEquals(190, writer->docCount());
    {
        ValueArray<Directory*> dirs(1);
        dirs[0] = aux3;
        writer->addIndexesNoOptimize( dirs );
    }
    assertEquals(230, writer->docCount());
    writer->close();
    _CLLDELETE(writer);

    // make sure the new index is correct
    verifyNumDocs(tc, dir, 230);

    Term t1(_T("content"), _T("aaa"));
    Term t2(_T("content"), _T("bbb"));

    verifyTermDocs(tc, dir, &t1, 180);
    verifyTermDocs(tc, dir, &t2, 50);

    // now optimize it.
    writer = newWriter(dir, &analyzer, false);
    writer->optimize();
    writer->close();
    _CLLDELETE(writer);

    // make sure the new index is correct
    verifyNumDocs(tc, dir, 230);

    verifyTermDocs(tc, dir, &t1, 180);
    verifyTermDocs(tc, dir, &t2, 50);

    // now add a single document
    Directory * aux4 = _CLNEW RAMDirectory();
    writer = newWriter(aux4, &analyzer, true);
    addDocs2(writer, 1);
    writer->close();
    _CLLDELETE(writer);

    writer = newWriter(dir, &analyzer, false);
    assertEquals(230, writer->docCount());

    {
        ValueArray<Directory*> dirs(1);
        dirs[0] = aux4;
        writer->addIndexesNoOptimize( dirs );
    }

    assertEquals(231, writer->docCount());
    writer->close();
    _CLLDELETE(writer);

    verifyNumDocs(tc, dir, 231);

    verifyTermDocs(tc, dir, &t2, 51);

    dir->close();
    _CLLDELETE(dir);
    aux->close();
    _CLLDELETE(aux);
    aux2->close();
    _CLLDELETE(aux2);
    aux3->close();
    _CLLDELETE(aux3);
    aux4->close();
    _CLLDELETE(aux4);
}
Esempio n. 5
0
int addToHash(char *word, char *fileName, HashTable *Index){
    // get key 
    unsigned long key = JenkinsHash(word, MAX_HASH_SLOT);



    // if word is in hashtable, add doc and return 
    if(hashLookUp(word, Index) == 0){

        // get word doc
        if(strcmp(Index->table[key]->word, word) == 0){
            addDocs(Index->table[key], fileName, Index);
            return 0;
        }

        wordNode *tmp = Index->table[key];
        while(tmp){
            if(strcmp(tmp->word, word) == 0){
                addDocs(tmp, fileName, Index);
            }
            tmp= tmp->next;
        }
       return 0;
    }

/////////////////////////////////////////////////////////////////////////////////////
    // create node
    wordNode *node = malloc(sizeof(wordNode));
    node->word = malloc(strlen(word)+1);
    strcpy(node->word, word);
    node->next = NULL;
    node->doc = NULL;


    if(!node){
      
        return 3;
    }

    if(Index->table[key] == NULL){
        
        Index->table[key] = node;
        addDocs(Index->table[key], fileName, Index);
        return 1;
    }

    // if hashtable new word 
    if(hashLookUp(word, Index) == 1){
        
        if(Index->table[key]){
            wordNode *tmpNode = Index->table[key];
            while(tmpNode->next != NULL){
                tmpNode=tmpNode->next;
            }

            tmpNode->next = node;
            addDocs(tmpNode->next, fileName, Index);
            return 1;
        }
    }


return 0;

}