Exemplo n.º 1
0
void test3()
{
    addRecords();
    
    /// Test the Trie

    SynonymContainer *syn = SynonymContainer::getInstance("", SYNONYM_DONOT_KEEP_ORIGIN);
    syn->init();

    Analyzer *analyzer = new Analyzer(NULL, NULL, NULL, syn, "");

    unsigned mergeEveryNSeconds = 3;
    unsigned mergeEveryMWrites = 5;
    unsigned updateHistogramEveryPMerges = 1;
    unsigned updateHistogramEveryQWrites = 5;
    string INDEX_DIR = ".";
    IndexMetaData *indexMetaData = new IndexMetaData( GlobalCache::create(1000,1000),
    		mergeEveryNSeconds, mergeEveryMWrites,
    		updateHistogramEveryPMerges, updateHistogramEveryQWrites,
    		INDEX_DIR);
    
    Indexer *indexer = Indexer::load(indexMetaData);

    //index->print_Index();

    Record *record = new Record(indexer->getSchema());
    record->setPrimaryKey(1999);
    record->setSearchableAttributeValue(0, "steve jobs");
    record->setSearchableAttributeValue(1, "stanford speech");
    record->setRecordBoost(90);
    indexer->addRecord(record, analyzer);
    indexer->merge_ForTesting();

/*    // create an index searcher
    IndexSearcher *indexSearcher = IndexSearcher::create(indexer);
    Analyzer *analyzer = indexer->getAnalyzer();

    indexer->print_Index();

    ASSERT ( ping(analyzer, indexSearcher, "tom" , 1 , 1001) == true);
    ASSERT ( ping(analyzer, indexSearcher, "jimi" , 1 , 1008) == true);
    ASSERT ( ping(analyzer, indexSearcher, "smith" , 1 , 1001) == true);
    ASSERT ( ping(analyzer, indexSearcher, "jobs" , 1 , 1999) == true);

    (void)analyzer;
    delete indexSearcher;*/

    delete indexer;
    delete analyzer;
}
Exemplo n.º 2
0
// Read data from file, build the index, and save the index to disk
void buildIndex(string data_file, string index_dir)
{
    /// Set up the Schema
    Schema *schema = Schema::create(srch2is::DefaultIndex);
    schema->setPrimaryKey("primaryKey");
    schema->setSearchableAttribute("description", 2);
    schema->setScoringExpression("idf_score*doc_boost");

    /// Create an Analyzer
    Analyzer *analyzer = new Analyzer(NULL, NULL, NULL, NULL, "",
                                      srch2is::STANDARD_ANALYZER);

    /// Create an index writer
    unsigned mergeEveryNSeconds = 2;
    unsigned mergeEveryMWrites = 5;
    unsigned updateHistogramEveryPMerges = 1;
    unsigned updateHistogramEveryQWrites = 5;
    IndexMetaData *indexMetaData = new IndexMetaData( new CacheManager(),
    		mergeEveryNSeconds, mergeEveryMWrites,
    		updateHistogramEveryPMerges, updateHistogramEveryQWrites, index_dir);
    Indexer *indexer = Indexer::create(indexMetaData, analyzer, schema);

    Record *record = new Record(schema);

    unsigned docsCounter = 0;
    string line;

    ifstream data(data_file.c_str());

    /// Read records from file
    /// the file should have two fields, seperated by '^'
    /// the first field is the primary key, the second field is a searchable attribute
    while(getline(data,line))
    {
        unsigned cellCounter = 0;
        stringstream  lineStream(line);
        string cell;

        while(getline(lineStream,cell,'^') && cellCounter < 3 )
        {
            if (cellCounter == 0)
            {
                record->setPrimaryKey(cell.c_str());
            }
            else if (cellCounter == 1)
            {
                record->setSearchableAttributeValue(0, cell);
            }
            else
            {
                float recordBoost = atof(cell.c_str());
                record->setRecordBoost(recordBoost);
            }

            cellCounter++;
        }

        indexer->addRecord(record, analyzer);

        docsCounter++;

        record->clear();
    }

    cout << "#Docs Read:" << docsCounter << endl;

    indexer->commit();
    indexer->save();

    cout << "Index saved." << endl;

    data.close();

    delete indexer;
    delete indexMetaData;
    delete analyzer;
    delete schema;
}
Exemplo n.º 3
0
void test1()
{
    Schema *schema = Schema::create(srch2::instantsearch::DefaultIndex);

    schema->setPrimaryKey("article_id"); // integer, not searchable

    schema->setSearchableAttribute("article_id"); // convert id to searchable text
    schema->setSearchableAttribute("article_authors", 2); // searchable text
    schema->setSearchableAttribute("article_title", 7); // searchable text

    // create an analyzer
    SynonymContainer *syn = SynonymContainer::getInstance("", SYNONYM_DONOT_KEEP_ORIGIN);
    syn->init();
    Analyzer *analyzer = new Analyzer(NULL, NULL, NULL, syn, "");
    
    unsigned mergeEveryNSeconds = 3;
    unsigned mergeEveryMWrites = 5;
    unsigned updateHistogramEveryPMerges = 1;
    unsigned updateHistogramEveryQWrites = 5;
    string INDEX_DIR = "test";
    IndexMetaData *indexMetaData = new IndexMetaData( new CacheManager(),
    		mergeEveryNSeconds, mergeEveryMWrites,
    		updateHistogramEveryPMerges, updateHistogramEveryQWrites,
    		INDEX_DIR);
    
    Indexer *index = Indexer::create(indexMetaData, analyzer, schema);
    Record *record = new Record(schema);
    char* authorsCharStar = new char[30];
    char* titleCharStar = new char[30];

    //generate random characers
    srand ( time(NULL) );
    // create a record of 3 attributes
    for (unsigned i = 0; i < 1000; i++)
    {
        record->setPrimaryKey(i + 1000);

        sprintf(authorsCharStar,"John %cLen%cnon",(rand() % 50)+65,(rand() % 10)+65);
        string authors = string(authorsCharStar);
        record->setSearchableAttributeValue("article_authors", authors);

        sprintf(titleCharStar,"Yesterday %cOnc%ce %cMore",
                (rand()%59)+65, (rand()%59)+65, (rand()%10)+65);
        string title = string(titleCharStar);
        record->setSearchableAttributeValue("article_title", title);

        record->setRecordBoost(rand() % 100);
        index->addRecord(record, analyzer);

        // for creating another record
        record->clear();
    }

    // build the index
    index->commit();

    //indexer->printNumberOfBytes();

    delete[] authorsCharStar;
    delete[] titleCharStar;
    delete record;
    delete index;
    delete analyzer;
    delete schema;
}
Exemplo n.º 4
0
void addRecords()
{
    ///Create Schema
    Schema *schema = Schema::create(srch2::instantsearch::DefaultIndex);
    schema->setPrimaryKey("article_id"); // integer, not searchable
    schema->setSearchableAttribute("article_id"); // convert id to searchable text
    schema->setSearchableAttribute("article_authors", 2); // searchable text
    schema->setSearchableAttribute("article_title", 7); // searchable text
    
    SynonymContainer *syn = SynonymContainer::getInstance("", SYNONYM_DONOT_KEEP_ORIGIN);
    syn->init();
    Record *record = new Record(schema);
    Analyzer *analyzer = new Analyzer(NULL, NULL, NULL, syn, "");

    unsigned mergeEveryNSeconds = 3;
    unsigned mergeEveryMWrites = 5;
    unsigned updateHistogramEveryPMerges = 1;
    unsigned updateHistogramEveryQWrites = 5;
    string INDEX_DIR = ".";
    IndexMetaData *indexMetaData = new IndexMetaData( NULL,
    		mergeEveryNSeconds, mergeEveryMWrites,
    		updateHistogramEveryPMerges, updateHistogramEveryQWrites,
    		INDEX_DIR);
    Indexer *index = Indexer::create(indexMetaData, analyzer, schema);
    
    record->setPrimaryKey(1001);
    record->setSearchableAttributeValue("article_authors", "Tom Smith and Jack Lennon");
    record->setSearchableAttributeValue("article_title", "come Yesterday Once More");
    record->setRecordBoost(10);
    index->addRecord(record, analyzer);

    record->clear();
    record->setPrimaryKey(1008);
    record->setSearchableAttributeValue(0, "Jimi Hendrix");
    record->setSearchableAttributeValue(1, "Little wing");
    record->setRecordBoost(90);
    index->addRecord(record, analyzer);

    index->commit();
    //index->commit();
    //index->print_Index();

    std::cout << "print 1 $$$$$$$$$$$$$$" << std::endl;

    record->clear();
    record->setPrimaryKey(1007);
    record->setSearchableAttributeValue(0, "Jimaai Hendaarix");
    record->setSearchableAttributeValue(1, "Littaale waaing");
    record->setRecordBoost(90);
    index->addRecord(record, analyzer);

    //index->print_Index();

    std::cout << "print 2 $$$$$$$$$$$$$$" << std::endl;

    delete schema;
    delete record;
    delete analyzer;
    delete index;
    syn->free();
}
Indexer *buildIndex(string data_file, string index_dir, string expression)
{
    /// Set up the Schema
    Schema *schema = Schema::create(srch2is::DefaultIndex, srch2::instantsearch::POSITION_INDEX_FIELDBIT);
    schema->setPrimaryKey("id");
    schema->setSearchableAttribute("name", 2);
    schema->setSearchableAttribute("category", 1);
    schema->setScoringExpression(expression);

    /// Create an Analyzer
    SynonymContainer *syn = SynonymContainer::getInstance(string(""), SYNONYM_DONOT_KEEP_ORIGIN);
    ProtectedWordsContainer *prot = ProtectedWordsContainer::getInstance("");
    AnalyzerInternal *simpleAnlyzer = new StandardAnalyzer(NULL, NULL, prot, syn, string(""));

    Analyzer *analyzer = new Analyzer(NULL, NULL, prot, syn, "", srch2is::STANDARD_ANALYZER);

    /// Create an index writer
    unsigned mergeEveryNSeconds = 3;
    unsigned mergeEveryMWrites = 5;
    unsigned updateHistogramEveryPMerges = 1;
    unsigned updateHistogramEveryQWrites = 5;
    IndexMetaData *indexMetaData = new IndexMetaData( new CacheManager(),
    		mergeEveryNSeconds, mergeEveryMWrites,
    		updateHistogramEveryPMerges, updateHistogramEveryQWrites,
    		index_dir);
    Indexer *indexer = Indexer::create(indexMetaData, analyzer, schema);

    Record *record = new Record(schema);

    unsigned docsCounter = 0;
    string line;

    ifstream data(data_file.c_str());

    /// Read records from file
    /// the file should have two fields, seperated by '^'
    /// the first field is the primary key, the second field is a searchable attribute
    while(getline(data,line))
    {
        unsigned cellCounter = 0;
        stringstream  lineStream(line);
        string cell;

        while(getline(lineStream,cell,'^') && cellCounter < 4 )
        {
            if (cellCounter == 0)
            {
                record->setPrimaryKey(cell.c_str());
            }
            else if (cellCounter == 1)
            {
                record->setSearchableAttributeValue(0, cell);
            }
            else if (cellCounter == 2)
            {
                record->setSearchableAttributeValue(1, cell);
            }
            else if (cellCounter == 3)
            {
                record->setRecordBoost(atof(cell.c_str()));
            }

            cellCounter++;
        }

        indexer->addRecord(record, analyzer);

        docsCounter++;

        record->clear();
    }

    cout << "#Docs Read:" << docsCounter << endl;

    indexer->commit();

    data.close();

    delete record;
    delete analyzer;
    delete schema;
    prot->free();
    syn->free();

    return indexer;
}