/* * hashbuild() -- build a new hash index. */ IndexBuildResult * hashbuild(Relation heap, Relation index, IndexInfo *indexInfo) { IndexBuildResult *result; BlockNumber relpages; double reltuples; double allvisfrac; uint32 num_buckets; HashBuildState buildstate; /* * We expect to be called exactly once for any index relation. If that's * not the case, big trouble's what we have. */ if (RelationGetNumberOfBlocks(index) != 0) elog(ERROR, "index \"%s\" already contains data", RelationGetRelationName(index)); /* Estimate the number of rows currently present in the table */ estimate_rel_size(heap, NULL, &relpages, &reltuples, &allvisfrac); /* Initialize the hash index metadata page and initial buckets */ num_buckets = _hash_metapinit(index, reltuples, MAIN_FORKNUM); /* * If we just insert the tuples into the index in scan order, then * (assuming their hash codes are pretty random) there will be no locality * of access to the index, and if the index is bigger than available RAM * then we'll thrash horribly. To prevent that scenario, we can sort the * tuples by (expected) bucket number. However, such a sort is useless * overhead when the index does fit in RAM. We choose to sort if the * initial index size exceeds NBuffers. * * NOTE: this test will need adjustment if a bucket is ever different from * one page. */ if (num_buckets >= (uint32) NBuffers) buildstate.spool = _h_spoolinit(heap, index, num_buckets); else buildstate.spool = NULL; /* prepare to build the index */ buildstate.indtuples = 0; /* do the heap scan */ reltuples = IndexBuildHeapScan(heap, index, indexInfo, true, hashbuildCallback, (void *) &buildstate); if (buildstate.spool) { /* sort the tuples and insert them into the index */ _h_indexbuild(buildstate.spool); _h_spooldestroy(buildstate.spool); } /* * Return statistics */ result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult)); result->heap_tuples = reltuples; result->index_tuples = buildstate.indtuples; return result; }
/* * hashbuild() -- build a new hash index. */ IndexBuildResult * hashbuild(Relation heap, Relation index, IndexInfo *indexInfo) { IndexBuildResult *result; BlockNumber relpages; double reltuples; double allvisfrac; uint32 num_buckets; long sort_threshold; HashBuildState buildstate; /* * We expect to be called exactly once for any index relation. If that's * not the case, big trouble's what we have. */ if (RelationGetNumberOfBlocks(index) != 0) elog(ERROR, "index \"%s\" already contains data", RelationGetRelationName(index)); /* Estimate the number of rows currently present in the table */ estimate_rel_size(heap, NULL, &relpages, &reltuples, &allvisfrac); /* Initialize the hash index metadata page and initial buckets */ num_buckets = _hash_init(index, reltuples, MAIN_FORKNUM); /* * If we just insert the tuples into the index in scan order, then * (assuming their hash codes are pretty random) there will be no locality * of access to the index, and if the index is bigger than available RAM * then we'll thrash horribly. To prevent that scenario, we can sort the * tuples by (expected) bucket number. However, such a sort is useless * overhead when the index does fit in RAM. We choose to sort if the * initial index size exceeds maintenance_work_mem, or the number of * buffers usable for the index, whichever is less. (Limiting by the * number of buffers should reduce thrashing between PG buffers and kernel * buffers, which seems useful even if no physical I/O results. Limiting * by maintenance_work_mem is useful to allow easy testing of the sort * code path, and may be useful to DBAs as an additional control knob.) * * NOTE: this test will need adjustment if a bucket is ever different from * one page. Also, "initial index size" accounting does not include the * metapage, nor the first bitmap page. */ sort_threshold = (maintenance_work_mem * 1024L) / BLCKSZ; if (index->rd_rel->relpersistence != RELPERSISTENCE_TEMP) sort_threshold = Min(sort_threshold, NBuffers); else sort_threshold = Min(sort_threshold, NLocBuffer); if (num_buckets >= (uint32) sort_threshold) buildstate.spool = _h_spoolinit(heap, index, num_buckets); else buildstate.spool = NULL; /* prepare to build the index */ buildstate.indtuples = 0; buildstate.heapRel = heap; /* do the heap scan */ reltuples = IndexBuildHeapScan(heap, index, indexInfo, true, hashbuildCallback, (void *) &buildstate); if (buildstate.spool) { /* sort the tuples and insert them into the index */ _h_indexbuild(buildstate.spool, buildstate.heapRel); _h_spooldestroy(buildstate.spool); } /* * Return statistics */ result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult)); result->heap_tuples = reltuples; result->index_tuples = buildstate.indtuples; return result; }