Beispiel #1
0
void Serialize(string directoryName)
{
	// Create a schema
	Schema *schema = Schema::create(LocationIndex);
	schema->setPrimaryKey("list_id"); // integer, by default not searchable
	schema->setSearchableAttribute("title", 2); // searchable text
	schema->setSearchableAttribute("address", 7); // searchable text

    // Create an analyzer
    Analyzer *analyzer = new Analyzer(NULL, NULL, NULL, NULL, "");

    unsigned mergeEveryNSeconds = 3;
    unsigned mergeEveryMWrites = 5;
    unsigned updateHistogramEveryPMerges = 1;
    unsigned updateHistogramEveryQWrites = 5;
	CacheManager *cache = new CacheManager(134217728);
    IndexMetaData *indexMetaData = new IndexMetaData(cache,
    		mergeEveryNSeconds, mergeEveryMWrites,
    		updateHistogramEveryPMerges, updateHistogramEveryQWrites,
    		directoryName);

	Indexer *indexer = Indexer::create(indexMetaData, analyzer, schema);

	readRecordsFromFile(indexer, schema, analyzer, directoryName+"/quadtree/1K");

    boost::shared_ptr<QuadTreeRootNodeAndFreeLists> quadtree_ReadView;
    quadtree_ReadView = dynamic_cast<IndexReaderWriter *>(indexer)->getQuadTree_ReadView();
    QuadTreeNode *qt = quadtree_ReadView->root;

	// serialize the index
	indexer->commit();
	indexer->save(directoryName);

	delete indexer;
    delete indexMetaData;
	delete analyzer;
	delete schema;
}
Beispiel #2
0
// Read data from file, build the index, and save the index to disk
void buildIndex(string data_file, string index_dir)
{
    /// Set up the Schema
    Schema *schema = Schema::create(srch2is::DefaultIndex);
    schema->setPrimaryKey("primaryKey");
    schema->setSearchableAttribute("description", 2);
    schema->setScoringExpression("idf_score*doc_boost");

    /// Create an Analyzer
    Analyzer *analyzer = new Analyzer(NULL, NULL, NULL, NULL, "",
                                      srch2is::STANDARD_ANALYZER);

    /// Create an index writer
    unsigned mergeEveryNSeconds = 2;
    unsigned mergeEveryMWrites = 5;
    unsigned updateHistogramEveryPMerges = 1;
    unsigned updateHistogramEveryQWrites = 5;
    IndexMetaData *indexMetaData = new IndexMetaData( new CacheManager(),
    		mergeEveryNSeconds, mergeEveryMWrites,
    		updateHistogramEveryPMerges, updateHistogramEveryQWrites, index_dir);
    Indexer *indexer = Indexer::create(indexMetaData, analyzer, schema);

    Record *record = new Record(schema);

    unsigned docsCounter = 0;
    string line;

    ifstream data(data_file.c_str());

    /// Read records from file
    /// the file should have two fields, seperated by '^'
    /// the first field is the primary key, the second field is a searchable attribute
    while(getline(data,line))
    {
        unsigned cellCounter = 0;
        stringstream  lineStream(line);
        string cell;

        while(getline(lineStream,cell,'^') && cellCounter < 3 )
        {
            if (cellCounter == 0)
            {
                record->setPrimaryKey(cell.c_str());
            }
            else if (cellCounter == 1)
            {
                record->setSearchableAttributeValue(0, cell);
            }
            else
            {
                float recordBoost = atof(cell.c_str());
                record->setRecordBoost(recordBoost);
            }

            cellCounter++;
        }

        indexer->addRecord(record, analyzer);

        docsCounter++;

        record->clear();
    }

    cout << "#Docs Read:" << docsCounter << endl;

    indexer->commit();
    indexer->save();

    cout << "Index saved." << endl;

    data.close();

    delete indexer;
    delete indexMetaData;
    delete analyzer;
    delete schema;
}
Beispiel #3
0
// Test using the circle range
void testCircleRange(string directoryName)
{
	// Create a schema
	Schema *schema = Schema::create(LocationIndex);
	schema->setPrimaryKey("list_id"); // integer, by default not searchable
	schema->setSearchableAttribute("title", 2); // searchable text
	schema->setSearchableAttribute("address", 7); // searchable text

    // Create an analyzer
    Analyzer *analyzer = new Analyzer(NULL, NULL, NULL, NULL, "");

    unsigned mergeEveryNSeconds = 3;
    unsigned mergeEveryMWrites = 5;
    unsigned updateHistogramEveryPMerges = 1;
    unsigned updateHistogramEveryQWrites = 5;
	CacheManager *cache = new CacheManager(134217728);
    IndexMetaData *indexMetaData = new IndexMetaData( cache,
    		mergeEveryNSeconds, mergeEveryMWrites,
    		updateHistogramEveryPMerges, updateHistogramEveryQWrites,
    		directoryName);

	Indexer *indexer = Indexer::create(indexMetaData, analyzer, schema);

	// Create five records of 8 attributes and add them to the index
	addGeoRecord(indexer, schema, analyzer, 0, "Tom Smith and Jack Lennon", "Yesterday Once More", 100.0, 100.0);
	addGeoRecord(indexer, schema, analyzer, 1, "George Harris", "Here comes the sun", 110.0, 110.0);
	addGeoRecord(indexer, schema, analyzer, 2, "George Harris", "Here comes the sun", 10.0, 10.0);
	addGeoRecord(indexer, schema, analyzer, 3, "George Harris", "Here comes the sun", -100.0, -100.0);
	addGeoRecord(indexer, schema, analyzer, 4, "George Harris", "Here comes the sun", -110.0, -110.0);
	addGeoRecord(indexer, schema, analyzer, 5, "George Harris", "Here comes the sun", -100.0, 100.0);
	addGeoRecord(indexer, schema, analyzer, 6, "George Harris", "Here comes the sun", 100.0, -100.0);
	addGeoRecord(indexer, schema, analyzer, 7, "George Harris", "Here comes the sun", 101.0, -101.0);

	// commit the index
    bool retval = indexer->commit();
	ASSERT( retval == 1 );
    (void)retval;

    // Storing results of the query and expected results
    vector<vector<unsigned>*> expectedResults;
    vector<vector<GeoElement*>*>  results;

    QueryEvaluatorRuntimeParametersContainer runTimeParameters;
    QueryEvaluator * queryEvaluator = new QueryEvaluator(indexer,&runTimeParameters );


    //Rectangle queryRange(pair(pair(-20,-20),pair(20,20)));
    Point point;
    point.x = 100; point.y = 100;
    Circle circle(point,30);
    boost::shared_ptr<QuadTreeRootNodeAndFreeLists> quadtree_ReadView;
    quadtree_ReadView = dynamic_cast<IndexReaderWriter *>(indexer)->getQuadTree_ReadView();
    QuadTreeNode *qt = quadtree_ReadView->root;
    qt->rangeQuery(results,circle);
    vector<unsigned> res;

    res.push_back(getExternalId(queryEvaluator,0));
    res.push_back(getExternalId(queryEvaluator,1));
    res.push_back(getExternalId(queryEvaluator,2));

    expectedResults.push_back(&res);

    verifyResults(results,expectedResults);

	delete indexer;
    delete indexMetaData;
	delete analyzer;
	delete schema;
}
void addRecords()
{
    ///Create Schema
    Schema *schema = Schema::create(srch2::instantsearch::DefaultIndex);
    schema->setPrimaryKey("article_id"); // integer, not searchable
    schema->setSearchableAttribute("article_id"); // convert id to searchable text
    schema->setSearchableAttribute("article_authors", 2); // searchable text
    schema->setSearchableAttribute("article_title", 7); // searchable text
    
    SynonymContainer *syn = SynonymContainer::getInstance("", SYNONYM_DONOT_KEEP_ORIGIN);
    syn->init();
    Record *record = new Record(schema);
    Analyzer *analyzer = new Analyzer(NULL, NULL, NULL, syn, "");

    unsigned mergeEveryNSeconds = 3;
    unsigned mergeEveryMWrites = 5;
    unsigned updateHistogramEveryPMerges = 1;
    unsigned updateHistogramEveryQWrites = 5;
    string INDEX_DIR = ".";
    IndexMetaData *indexMetaData = new IndexMetaData( NULL,
    		mergeEveryNSeconds, mergeEveryMWrites,
    		updateHistogramEveryPMerges, updateHistogramEveryQWrites,
    		INDEX_DIR);
    Indexer *index = Indexer::create(indexMetaData, analyzer, schema);
    
    record->setPrimaryKey(1001);
    record->setSearchableAttributeValue("article_authors", "Tom Smith and Jack Lennon");
    record->setSearchableAttributeValue("article_title", "come Yesterday Once More");
    record->setRecordBoost(10);
    index->addRecord(record, analyzer);

    record->clear();
    record->setPrimaryKey(1008);
    record->setSearchableAttributeValue(0, "Jimi Hendrix");
    record->setSearchableAttributeValue(1, "Little wing");
    record->setRecordBoost(90);
    index->addRecord(record, analyzer);

    index->commit();
    //index->commit();
    //index->print_Index();

    std::cout << "print 1 $$$$$$$$$$$$$$" << std::endl;

    record->clear();
    record->setPrimaryKey(1007);
    record->setSearchableAttributeValue(0, "Jimaai Hendaarix");
    record->setSearchableAttributeValue(1, "Littaale waaing");
    record->setRecordBoost(90);
    index->addRecord(record, analyzer);

    //index->print_Index();

    std::cout << "print 2 $$$$$$$$$$$$$$" << std::endl;

    delete schema;
    delete record;
    delete analyzer;
    delete index;
    syn->free();
}
void test1()
{
    Schema *schema = Schema::create(srch2::instantsearch::DefaultIndex);

    schema->setPrimaryKey("article_id"); // integer, not searchable

    schema->setSearchableAttribute("article_id"); // convert id to searchable text
    schema->setSearchableAttribute("article_authors", 2); // searchable text
    schema->setSearchableAttribute("article_title", 7); // searchable text

    // create an analyzer
    SynonymContainer *syn = SynonymContainer::getInstance("", SYNONYM_DONOT_KEEP_ORIGIN);
    syn->init();
    Analyzer *analyzer = new Analyzer(NULL, NULL, NULL, syn, "");
    
    unsigned mergeEveryNSeconds = 3;
    unsigned mergeEveryMWrites = 5;
    unsigned updateHistogramEveryPMerges = 1;
    unsigned updateHistogramEveryQWrites = 5;
    string INDEX_DIR = "test";
    IndexMetaData *indexMetaData = new IndexMetaData( new CacheManager(),
    		mergeEveryNSeconds, mergeEveryMWrites,
    		updateHistogramEveryPMerges, updateHistogramEveryQWrites,
    		INDEX_DIR);
    
    Indexer *index = Indexer::create(indexMetaData, analyzer, schema);
    Record *record = new Record(schema);
    char* authorsCharStar = new char[30];
    char* titleCharStar = new char[30];

    //generate random characers
    srand ( time(NULL) );
    // create a record of 3 attributes
    for (unsigned i = 0; i < 1000; i++)
    {
        record->setPrimaryKey(i + 1000);

        sprintf(authorsCharStar,"John %cLen%cnon",(rand() % 50)+65,(rand() % 10)+65);
        string authors = string(authorsCharStar);
        record->setSearchableAttributeValue("article_authors", authors);

        sprintf(titleCharStar,"Yesterday %cOnc%ce %cMore",
                (rand()%59)+65, (rand()%59)+65, (rand()%10)+65);
        string title = string(titleCharStar);
        record->setSearchableAttributeValue("article_title", title);

        record->setRecordBoost(rand() % 100);
        index->addRecord(record, analyzer);

        // for creating another record
        record->clear();
    }

    // build the index
    index->commit();

    //indexer->printNumberOfBytes();

    delete[] authorsCharStar;
    delete[] titleCharStar;
    delete record;
    delete index;
    delete analyzer;
    delete schema;
}
Indexer *buildIndex(string data_file, string index_dir, string expression)
{
    /// Set up the Schema
    Schema *schema = Schema::create(srch2is::DefaultIndex, srch2::instantsearch::POSITION_INDEX_FIELDBIT);
    schema->setPrimaryKey("id");
    schema->setSearchableAttribute("name", 2);
    schema->setSearchableAttribute("category", 1);
    schema->setScoringExpression(expression);

    /// Create an Analyzer
    SynonymContainer *syn = SynonymContainer::getInstance(string(""), SYNONYM_DONOT_KEEP_ORIGIN);
    ProtectedWordsContainer *prot = ProtectedWordsContainer::getInstance("");
    AnalyzerInternal *simpleAnlyzer = new StandardAnalyzer(NULL, NULL, prot, syn, string(""));

    Analyzer *analyzer = new Analyzer(NULL, NULL, prot, syn, "", srch2is::STANDARD_ANALYZER);

    /// Create an index writer
    unsigned mergeEveryNSeconds = 3;
    unsigned mergeEveryMWrites = 5;
    unsigned updateHistogramEveryPMerges = 1;
    unsigned updateHistogramEveryQWrites = 5;
    IndexMetaData *indexMetaData = new IndexMetaData( new CacheManager(),
    		mergeEveryNSeconds, mergeEveryMWrites,
    		updateHistogramEveryPMerges, updateHistogramEveryQWrites,
    		index_dir);
    Indexer *indexer = Indexer::create(indexMetaData, analyzer, schema);

    Record *record = new Record(schema);

    unsigned docsCounter = 0;
    string line;

    ifstream data(data_file.c_str());

    /// Read records from file
    /// the file should have two fields, seperated by '^'
    /// the first field is the primary key, the second field is a searchable attribute
    while(getline(data,line))
    {
        unsigned cellCounter = 0;
        stringstream  lineStream(line);
        string cell;

        while(getline(lineStream,cell,'^') && cellCounter < 4 )
        {
            if (cellCounter == 0)
            {
                record->setPrimaryKey(cell.c_str());
            }
            else if (cellCounter == 1)
            {
                record->setSearchableAttributeValue(0, cell);
            }
            else if (cellCounter == 2)
            {
                record->setSearchableAttributeValue(1, cell);
            }
            else if (cellCounter == 3)
            {
                record->setRecordBoost(atof(cell.c_str()));
            }

            cellCounter++;
        }

        indexer->addRecord(record, analyzer);

        docsCounter++;

        record->clear();
    }

    cout << "#Docs Read:" << docsCounter << endl;

    indexer->commit();

    data.close();

    delete record;
    delete analyzer;
    delete schema;
    prot->free();
    syn->free();

    return indexer;
}