void Serialize(string directoryName) { // Create a schema Schema *schema = Schema::create(LocationIndex); schema->setPrimaryKey("list_id"); // integer, by default not searchable schema->setSearchableAttribute("title", 2); // searchable text schema->setSearchableAttribute("address", 7); // searchable text // Create an analyzer Analyzer *analyzer = new Analyzer(NULL, NULL, NULL, NULL, ""); unsigned mergeEveryNSeconds = 3; unsigned mergeEveryMWrites = 5; unsigned updateHistogramEveryPMerges = 1; unsigned updateHistogramEveryQWrites = 5; CacheManager *cache = new CacheManager(134217728); IndexMetaData *indexMetaData = new IndexMetaData(cache, mergeEveryNSeconds, mergeEveryMWrites, updateHistogramEveryPMerges, updateHistogramEveryQWrites, directoryName); Indexer *indexer = Indexer::create(indexMetaData, analyzer, schema); readRecordsFromFile(indexer, schema, analyzer, directoryName+"/quadtree/1K"); boost::shared_ptr<QuadTreeRootNodeAndFreeLists> quadtree_ReadView; quadtree_ReadView = dynamic_cast<IndexReaderWriter *>(indexer)->getQuadTree_ReadView(); QuadTreeNode *qt = quadtree_ReadView->root; // serialize the index indexer->commit(); indexer->save(directoryName); delete indexer; delete indexMetaData; delete analyzer; delete schema; }
// Read data from file, build the index, and save the index to disk void buildIndex(string data_file, string index_dir) { /// Set up the Schema Schema *schema = Schema::create(srch2is::DefaultIndex); schema->setPrimaryKey("primaryKey"); schema->setSearchableAttribute("description", 2); schema->setScoringExpression("idf_score*doc_boost"); /// Create an Analyzer Analyzer *analyzer = new Analyzer(NULL, NULL, NULL, NULL, "", srch2is::STANDARD_ANALYZER); /// Create an index writer unsigned mergeEveryNSeconds = 2; unsigned mergeEveryMWrites = 5; unsigned updateHistogramEveryPMerges = 1; unsigned updateHistogramEveryQWrites = 5; IndexMetaData *indexMetaData = new IndexMetaData( new CacheManager(), mergeEveryNSeconds, mergeEveryMWrites, updateHistogramEveryPMerges, updateHistogramEveryQWrites, index_dir); Indexer *indexer = Indexer::create(indexMetaData, analyzer, schema); Record *record = new Record(schema); unsigned docsCounter = 0; string line; ifstream data(data_file.c_str()); /// Read records from file /// the file should have two fields, seperated by '^' /// the first field is the primary key, the second field is a searchable attribute while(getline(data,line)) { unsigned cellCounter = 0; stringstream lineStream(line); string cell; while(getline(lineStream,cell,'^') && cellCounter < 3 ) { if (cellCounter == 0) { record->setPrimaryKey(cell.c_str()); } else if (cellCounter == 1) { record->setSearchableAttributeValue(0, cell); } else { float recordBoost = atof(cell.c_str()); record->setRecordBoost(recordBoost); } cellCounter++; } indexer->addRecord(record, analyzer); docsCounter++; record->clear(); } cout << "#Docs Read:" << docsCounter << endl; indexer->commit(); indexer->save(); cout << "Index saved." << endl; data.close(); delete indexer; delete indexMetaData; delete analyzer; delete schema; }
// Test using the circle range void testCircleRange(string directoryName) { // Create a schema Schema *schema = Schema::create(LocationIndex); schema->setPrimaryKey("list_id"); // integer, by default not searchable schema->setSearchableAttribute("title", 2); // searchable text schema->setSearchableAttribute("address", 7); // searchable text // Create an analyzer Analyzer *analyzer = new Analyzer(NULL, NULL, NULL, NULL, ""); unsigned mergeEveryNSeconds = 3; unsigned mergeEveryMWrites = 5; unsigned updateHistogramEveryPMerges = 1; unsigned updateHistogramEveryQWrites = 5; CacheManager *cache = new CacheManager(134217728); IndexMetaData *indexMetaData = new IndexMetaData( cache, mergeEveryNSeconds, mergeEveryMWrites, updateHistogramEveryPMerges, updateHistogramEveryQWrites, directoryName); Indexer *indexer = Indexer::create(indexMetaData, analyzer, schema); // Create five records of 8 attributes and add them to the index addGeoRecord(indexer, schema, analyzer, 0, "Tom Smith and Jack Lennon", "Yesterday Once More", 100.0, 100.0); addGeoRecord(indexer, schema, analyzer, 1, "George Harris", "Here comes the sun", 110.0, 110.0); addGeoRecord(indexer, schema, analyzer, 2, "George Harris", "Here comes the sun", 10.0, 10.0); addGeoRecord(indexer, schema, analyzer, 3, "George Harris", "Here comes the sun", -100.0, -100.0); addGeoRecord(indexer, schema, analyzer, 4, "George Harris", "Here comes the sun", -110.0, -110.0); addGeoRecord(indexer, schema, analyzer, 5, "George Harris", "Here comes the sun", -100.0, 100.0); addGeoRecord(indexer, schema, analyzer, 6, "George Harris", "Here comes the sun", 100.0, -100.0); addGeoRecord(indexer, schema, analyzer, 7, "George Harris", "Here comes the sun", 101.0, -101.0); // commit the index bool retval = indexer->commit(); ASSERT( retval == 1 ); (void)retval; // Storing results of the query and expected results vector<vector<unsigned>*> expectedResults; vector<vector<GeoElement*>*> results; QueryEvaluatorRuntimeParametersContainer runTimeParameters; QueryEvaluator * queryEvaluator = new QueryEvaluator(indexer,&runTimeParameters ); //Rectangle queryRange(pair(pair(-20,-20),pair(20,20))); Point point; point.x = 100; point.y = 100; Circle circle(point,30); boost::shared_ptr<QuadTreeRootNodeAndFreeLists> quadtree_ReadView; quadtree_ReadView = dynamic_cast<IndexReaderWriter *>(indexer)->getQuadTree_ReadView(); QuadTreeNode *qt = quadtree_ReadView->root; qt->rangeQuery(results,circle); vector<unsigned> res; res.push_back(getExternalId(queryEvaluator,0)); res.push_back(getExternalId(queryEvaluator,1)); res.push_back(getExternalId(queryEvaluator,2)); expectedResults.push_back(&res); verifyResults(results,expectedResults); delete indexer; delete indexMetaData; delete analyzer; delete schema; }
void addRecords() { ///Create Schema Schema *schema = Schema::create(srch2::instantsearch::DefaultIndex); schema->setPrimaryKey("article_id"); // integer, not searchable schema->setSearchableAttribute("article_id"); // convert id to searchable text schema->setSearchableAttribute("article_authors", 2); // searchable text schema->setSearchableAttribute("article_title", 7); // searchable text SynonymContainer *syn = SynonymContainer::getInstance("", SYNONYM_DONOT_KEEP_ORIGIN); syn->init(); Record *record = new Record(schema); Analyzer *analyzer = new Analyzer(NULL, NULL, NULL, syn, ""); unsigned mergeEveryNSeconds = 3; unsigned mergeEveryMWrites = 5; unsigned updateHistogramEveryPMerges = 1; unsigned updateHistogramEveryQWrites = 5; string INDEX_DIR = "."; IndexMetaData *indexMetaData = new IndexMetaData( NULL, mergeEveryNSeconds, mergeEveryMWrites, updateHistogramEveryPMerges, updateHistogramEveryQWrites, INDEX_DIR); Indexer *index = Indexer::create(indexMetaData, analyzer, schema); record->setPrimaryKey(1001); record->setSearchableAttributeValue("article_authors", "Tom Smith and Jack Lennon"); record->setSearchableAttributeValue("article_title", "come Yesterday Once More"); record->setRecordBoost(10); index->addRecord(record, analyzer); record->clear(); record->setPrimaryKey(1008); record->setSearchableAttributeValue(0, "Jimi Hendrix"); record->setSearchableAttributeValue(1, "Little wing"); record->setRecordBoost(90); index->addRecord(record, analyzer); index->commit(); //index->commit(); //index->print_Index(); std::cout << "print 1 $$$$$$$$$$$$$$" << std::endl; record->clear(); record->setPrimaryKey(1007); record->setSearchableAttributeValue(0, "Jimaai Hendaarix"); record->setSearchableAttributeValue(1, "Littaale waaing"); record->setRecordBoost(90); index->addRecord(record, analyzer); //index->print_Index(); std::cout << "print 2 $$$$$$$$$$$$$$" << std::endl; delete schema; delete record; delete analyzer; delete index; syn->free(); }
void test1() { Schema *schema = Schema::create(srch2::instantsearch::DefaultIndex); schema->setPrimaryKey("article_id"); // integer, not searchable schema->setSearchableAttribute("article_id"); // convert id to searchable text schema->setSearchableAttribute("article_authors", 2); // searchable text schema->setSearchableAttribute("article_title", 7); // searchable text // create an analyzer SynonymContainer *syn = SynonymContainer::getInstance("", SYNONYM_DONOT_KEEP_ORIGIN); syn->init(); Analyzer *analyzer = new Analyzer(NULL, NULL, NULL, syn, ""); unsigned mergeEveryNSeconds = 3; unsigned mergeEveryMWrites = 5; unsigned updateHistogramEveryPMerges = 1; unsigned updateHistogramEveryQWrites = 5; string INDEX_DIR = "test"; IndexMetaData *indexMetaData = new IndexMetaData( new CacheManager(), mergeEveryNSeconds, mergeEveryMWrites, updateHistogramEveryPMerges, updateHistogramEveryQWrites, INDEX_DIR); Indexer *index = Indexer::create(indexMetaData, analyzer, schema); Record *record = new Record(schema); char* authorsCharStar = new char[30]; char* titleCharStar = new char[30]; //generate random characers srand ( time(NULL) ); // create a record of 3 attributes for (unsigned i = 0; i < 1000; i++) { record->setPrimaryKey(i + 1000); sprintf(authorsCharStar,"John %cLen%cnon",(rand() % 50)+65,(rand() % 10)+65); string authors = string(authorsCharStar); record->setSearchableAttributeValue("article_authors", authors); sprintf(titleCharStar,"Yesterday %cOnc%ce %cMore", (rand()%59)+65, (rand()%59)+65, (rand()%10)+65); string title = string(titleCharStar); record->setSearchableAttributeValue("article_title", title); record->setRecordBoost(rand() % 100); index->addRecord(record, analyzer); // for creating another record record->clear(); } // build the index index->commit(); //indexer->printNumberOfBytes(); delete[] authorsCharStar; delete[] titleCharStar; delete record; delete index; delete analyzer; delete schema; }
Indexer *buildIndex(string data_file, string index_dir, string expression) { /// Set up the Schema Schema *schema = Schema::create(srch2is::DefaultIndex, srch2::instantsearch::POSITION_INDEX_FIELDBIT); schema->setPrimaryKey("id"); schema->setSearchableAttribute("name", 2); schema->setSearchableAttribute("category", 1); schema->setScoringExpression(expression); /// Create an Analyzer SynonymContainer *syn = SynonymContainer::getInstance(string(""), SYNONYM_DONOT_KEEP_ORIGIN); ProtectedWordsContainer *prot = ProtectedWordsContainer::getInstance(""); AnalyzerInternal *simpleAnlyzer = new StandardAnalyzer(NULL, NULL, prot, syn, string("")); Analyzer *analyzer = new Analyzer(NULL, NULL, prot, syn, "", srch2is::STANDARD_ANALYZER); /// Create an index writer unsigned mergeEveryNSeconds = 3; unsigned mergeEveryMWrites = 5; unsigned updateHistogramEveryPMerges = 1; unsigned updateHistogramEveryQWrites = 5; IndexMetaData *indexMetaData = new IndexMetaData( new CacheManager(), mergeEveryNSeconds, mergeEveryMWrites, updateHistogramEveryPMerges, updateHistogramEveryQWrites, index_dir); Indexer *indexer = Indexer::create(indexMetaData, analyzer, schema); Record *record = new Record(schema); unsigned docsCounter = 0; string line; ifstream data(data_file.c_str()); /// Read records from file /// the file should have two fields, seperated by '^' /// the first field is the primary key, the second field is a searchable attribute while(getline(data,line)) { unsigned cellCounter = 0; stringstream lineStream(line); string cell; while(getline(lineStream,cell,'^') && cellCounter < 4 ) { if (cellCounter == 0) { record->setPrimaryKey(cell.c_str()); } else if (cellCounter == 1) { record->setSearchableAttributeValue(0, cell); } else if (cellCounter == 2) { record->setSearchableAttributeValue(1, cell); } else if (cellCounter == 3) { record->setRecordBoost(atof(cell.c_str())); } cellCounter++; } indexer->addRecord(record, analyzer); docsCounter++; record->clear(); } cout << "#Docs Read:" << docsCounter << endl; indexer->commit(); data.close(); delete record; delete analyzer; delete schema; prot->free(); syn->free(); return indexer; }