void GiST::InsertHelper(const GiSTentry &entry, int level, // level of tree at which to insert int *splitvec) // a vector to trigger Split instead of forced reinsert { GiSTnode *leaf; int overflow=0; leaf=ChooseSubtree(GiSTRootPage, entry, level); leaf->Insert(entry); if (leaf->IsOverFull(*store)) { if(ForcedReinsert()&&!leaf->Path().IsRoot()&&(!splitvec||!splitvec[level])) { int split[GIST_MAX_LEVELS]; // R*-tree-style forced reinsert for(int i=0; i<GIST_MAX_LEVELS; i++) split[i]=0; OverflowTreatment(leaf, entry, split); overflow=1; } else Split(&leaf, entry); if(leaf->IsOverFull(*store)) { // we only should get here if we reinserted, and the node re-filled assert(overflow); leaf->DeleteEntry(entry.Position()); Split(&leaf, entry); } } else WriteNode(leaf); if(!overflow) AdjustKeys(leaf, NULL); delete leaf; }
void GiST::AdjustKeys (GiSTnode *node, GiSTnode **parent) { if (node->Path().IsRoot()) { return; } GiSTnode *P; // Read in node's parent if (parent == NULL) { GiSTpath parent_path = node->Path(); parent_path.MakeParent (); P = ReadNode (parent_path); parent = &P; } else { P = *parent; } // Get the old entry pointing to node GiSTentry *entry = P->SearchPtr(node->Path().Page()); assert (entry != NULL); // Get union of node GiSTentry *actual = node->Union(); WriteNode(node); // added by myself for the splitted = false; actual->SetPtr(node->Path().Page()); if (!entry->IsEqual(*actual)) { int pos = entry->Position(); P->DeleteEntry(pos); P->InsertBefore(*actual, pos); // A split may be necessary. // XXX: should we do Forced Reinsert here too? if (P->IsOverFull(*store)) { Split (parent, *actual); GiSTpage page = node->Path().Page(); node->Path() = P->Path(); node->Path().MakeChild(page); } else { WriteNode (P); AdjustKeys (P, NULL); } } if (parent == &P) { delete P; } delete actual; delete entry; }
void MXTree::Split(GiSTnode **node, const GiSTentry& entry) { double radii[2], dist, *dists = new double[(*node)->NumEntries()*2]; int pageNums[2], cands[2]; vector<vector<int>> vec(2); ((MXTnode *)(*node))->TestPromotion(radii, &dist, pageNums, cands, dists, vec); if (Trade((*node)->Path().IsRoot(), radii, dist, pageNums, ((MXTnode *)(*node))->GetPageNum()+1, (*node)->NumEntries())) { // don't split now delete[] dists; GiSTpath oldPath = (*node)->Path(); int startPage = ((*node)->Path().IsRoot() ? rootPage : (*node)->Path().Page()); int pageNum = ((MXTnode *)(*node))->GetPageNum(); ((MXTfile *)store)->Deallocate(startPage, pageNum); startPage = ((MXTfile *)store)->Allocate(++pageNum); (*node)->Path().MakeSibling(startPage); rootPage = ((*node)->Path().IsRoot() ? startPage : rootPage); ((MXTnode *)(*node))->SetPageNum(pageNum); WriteNode(*node); if (!(*node)->Path().IsRoot() && startPage != oldPath.Page()) { GiSTpath parentPath = oldPath; parentPath.MakeParent(); GiSTnode *parentNode = ReadNode(parentPath); GiSTentry *e = parentNode->SearchPtr(oldPath.Page()); assert(e != NULL); int pos = e->Position(); e->SetPtr(startPage); parentNode->DeleteEntry(pos); parentNode->InsertBefore(*e, pos); WriteNode(parentNode); delete parentNode; delete e; } } else { // split now bool bLeft = false, bNewRoot = false; if ((*node)->Path().IsRoot()) { bNewRoot = true; (*node)->Path().MakeChild(rootPage); rootPage = store->Allocate(); } int oldPageNum = ((MXTnode *)(*node))->GetPageNum(); GiSTnode *node2 = ((MXTnode *)(*node))->PickSplit(cands, dists, vec); delete[] dists; int curPageNum = ((MXTnode *)(*node))->GetPageNum(); assert(oldPageNum >= curPageNum); if (oldPageNum > curPageNum) { ((MXTfile *)store)->Deallocate((*node)->Path().Page()+curPageNum, oldPageNum-curPageNum); } node2->Path().MakeSibling(((MXTfile *)store)->Allocate(((MXTnode *)node2)->GetPageNum())); WriteNode(*node); WriteNode(node2); GiSTentry *e = (*node)->SearchPtr(entry.Ptr()); if (e != NULL) { bLeft = true; delete e; } GiSTentry *e1 = (*node)->Union(); GiSTentry *e2 = node2->Union(); e1->SetPtr((*node)->Path().Page()); e2->SetPtr(node2->Path().Page()); // Create new root if root is being split if (bNewRoot) { GiSTnode *root = NewNode(this); root->SetLevel((*node)->Level() + 1); root->InsertBefore(*e1, 0); root->InsertBefore(*e2, 1); root->Path().MakeRoot(); WriteNode(root); delete root; } else { // Insert entry for N' in parent GiSTpath parentPath = (*node)->Path(); parentPath.MakeParent(); GiSTnode *parent = ReadNode(parentPath); // Find the entry for N in parent GiSTentry *e = parent->SearchPtr((*node)->Path().Page()); assert(e != NULL); // Insert the new entry right after it int pos = e->Position(); parent->DeleteEntry(pos); parent->InsertBefore(*e1, pos); parent->InsertBefore(*e2, pos+1); delete e; if (!parent->IsOverFull(*store)) { WriteNode(parent); } else { Split(&parent, bLeft? *e1: *e2); // parent is the node which contains the entry inserted GiSTpage page = (*node)->Path().Page(); (*node)->Path() = parent->Path(); // parent's path may change (*node)->Path().MakeChild(page); page = node2->Path().Page(); node2->Path() = (*node)->Path(); node2->Path().MakeSibling(page); } delete parent; } if (!bLeft) { delete *node; *node = node2; // return it } else { delete node2; } delete e1; delete e2; } }
// load this M-tree with n data using the BulkLoad algorithm [CP98] // data is an array of n entries // padFactor is the maximum node utilization (use 1) // name is the name of the tree void MT::BulkLoad (MTentry **data, int n, double padFactor, const char *name) { int size = 0; if (EntrySize()) { size = n * (sizeof(GiSTpage) + EntrySize()); // (only valid if we've fixed size entries) } else { for (int i=0; i<n; i++) { size += sizeof(GiSTlte) + sizeof(GiSTpage) + data[i]->CompressedLength(); } } int totSize = size + GIST_PAGE_HEADER_SIZE + sizeof(GiSTlte); if (totSize > Store()->PageSize()) { // we need to split the entries into several sub-trees int numEntries = (int)(Store()->PageSize()*padFactor*n) / totSize; int s = (int) MAX (MIN (numEntries, ceil(((float)n)/numEntries)), numEntries*MIN_UTIL); // initial number of samples int nSamples, *samples = new int[s], *sizes = NULL, *ns = NULL, iter = 0, MAXITER = s * s; GiSTlist<double *> *distm = (GiSTlist<double *> *) calloc (s, sizeof(GiSTlist<double *>)); // relative distances between samples int MINSIZE = (int) (Store()->PageSize()*MIN_UTIL), addEntrySize = EntrySize() ? sizeof(GiSTpage) : sizeof(GiSTlte)+sizeof(GiSTpage); GiSTlist<int> *lists = NULL; // set for each sample set GiSTlist<double> *dists = NULL; // set for distance between each sample and its members BOOL *bSampled = new BOOL[n]; // is this entry in the samples set? // sampling phase do { iter++; if (iter > 1) { // this is a new sampling phase while (!lists[0].IsEmpty()) { lists[0].RemoveFront (); dists[0].RemoveFront (); } delete []lists; delete []dists; delete []sizes; delete []ns; while (!distm[0].IsEmpty()) { delete []distm[0].RemoveFront(); // empty the distance list } for (int i=1; i<s; i++) { distm[i].front = distm[i].rear = NULL; } } if (iter >= MAXITER) { cout << "Too many loops in BulkLoad!"<<endl<<"Please select a lower minimum node utilization or a bigger node size."<<endl; exit(1); } for (int i=0; i<n; i++) { bSampled[i] = FALSE; } nSamples = 0; // pick s samples to create parents while (nSamples < s) { int i; do { i = PickRandom (0, n); } while (bSampled[i]); bSampled[i] = TRUE; samples[nSamples++] = i; } lists = new GiSTlist<int>[s]; dists = new GiSTlist<double>[s]; sizes = new int[s]; ns = new int[s]; for (int i=0; i<s; i++) { sizes[i] = GIST_PAGE_HEADER_SIZE + sizeof(GiSTlte); ns[i] = 1; distm[i].Prepend (new double[s]); } // compute the relative distances between samples for (int i=0; i<s; i++) { for (int j=0; j<i; j++) { distm[j].front->entry[i] = distm[i].front->entry[j] = data[samples[j]]->object().distance(data[samples[i]]->object()); } distm[i].front->entry[i] = 0; } // assign each entry to its nearest parent for (int i=0; i<n; i++) { if (bSampled[i]) { int j = 0; for (; samples[j]!=i; j++); // find this entry in the samples set and return position in it lists[j].Prepend (i); // insert the entry in the right sample dists[j].Prepend (0); // distance between sample and data[i] sizes[j] += addEntrySize + data[i]->CompressedLength(); } else { // here we optimize the distance computations (like we do in the insert algorithm) double *dist = new double[s]; // distance between this non-sample and samples dist[0] = data[samples[0]]->object().distance(data[i]->object()); int minIndex = 0; for (int j=1; j<s; j++) { // seek the nearest sample dist[j] = -MaxDist(); if (fabs (data[samples[j]]->Key()->distance - data[i]->Key()->distance) >= dist[minIndex]) { // pruning continue; } BOOL flag = TRUE; for (int k=0; k<j && flag; k++) { // pruning (other samples) if (dist[k] < 0) { continue; } else { flag = fabs (dist[k] - distm[j].front->entry[k]) < dist[minIndex]; } } if (!flag) { continue; } dist[j] = data[samples[j]]->object().distance(data[i]->object()); // have to compute this distance if (dist[j] < dist[minIndex]) { minIndex = j; } } lists[minIndex].Append (i); // insert the entry in the right sample dists[minIndex].Append (dist[minIndex]); // distance between sample and data[i] sizes[minIndex] += addEntrySize + data[i]->CompressedLength(); ns[minIndex]++; sizes[minIndex] >= MINSIZE ? delete []dist : distm[minIndex].Append (dist); // correspond with lists } } // redistribute underfilled parents int i; while (sizes[i = FindMin (sizes, nSamples)] < MINSIZE) { GiSTlist<int> list = lists[i]; // each sample set while (!dists[i].IsEmpty()) { // clear distance between each sample and its members dists[i].RemoveFront (); } // substitute this set with last set for (int j=0; j<nSamples; j++) { for (GiSTlistnode<double *> *node=distm[j].front; node; node=node->next) { node->entry[i] = node->entry[nSamples-1]; } } GiSTlist<double *> dlist = distm[i]; // relative distances between sample[i] and other samples, reposition by myself distm[i] = distm[nSamples-1]; lists[i] = lists[nSamples-1]; dists[i] = dists[nSamples-1]; samples[i] = samples[nSamples-1]; sizes[i] = sizes[nSamples-1]; ns[i] = ns[nSamples-1]; nSamples--; while (!list.IsEmpty()) { // assign each entry to its nearest parent double *dist = dlist.RemoveFront (); // relative distances between sample[i] (old) and other samples (old) int minIndex = -1; for (int j=0; j<nSamples && minIndex<0; j++) { // search for a computed distance if (dist[j] > 0) { minIndex = j; } } int k = list.RemoveFront (); if (minIndex < 0) { // no distance was computed (i.e. all distances were pruned) dist[0] = data[samples[0]]->object().distance(data[k]->object()); minIndex = 0; } for (int j=0; j<nSamples; j++) { if (j == minIndex) { continue; } if (dist[j] < 0) { // distance wasn't computed if (fabs (data[samples[j]]->Key()->distance - data[k]->Key()->distance) >= dist[minIndex]) { continue; // pruning } BOOL flag = TRUE; for (int i=0; i<j && flag; i++) { // pruning (other samples) if (dist[i] < 0) { continue; } else { flag = fabs (dist[i] - distm[j].front->entry[i]) < dist[minIndex]; } } if (!flag) { continue; } dist[j] = data[samples[j]]->object().distance(data[k]->object()); // have to compute this distance } if (dist[j] < dist[minIndex]) { minIndex = j; } } lists[minIndex].Append (k); dists[minIndex].Append (dist[minIndex]); sizes[minIndex] += addEntrySize + data[k]->CompressedLength(); ns[minIndex]++; sizes[minIndex] >= MINSIZE ? delete []dist : distm[minIndex].Append (dist); // correspond with lists } assert (dlist.IsEmpty()); // so is the list } } while (nSamples == 1); // if there's only one child, repeat the sampling phase MTentry ***array = new MTentry **[nSamples]; // array of the entries for each sub-tree for (int i=0; i<nSamples; i++) { // convert the lists into arrays array[i] = new MTentry *[ns[i]]; for (int j=0; j<ns[i]; j++) { array[i][j] = (MTentry *) data[lists[i].RemoveFront ()]->Copy(); array[i][j]->Key()->distance = dists[i].RemoveFront (); } assert (lists[i].IsEmpty()); assert (dists[i].IsEmpty()); } delete []lists; delete []dists; delete []sizes; delete []bSampled; for (int i=0; i<nSamples; i++) { while (!distm[i].IsEmpty()) { delete [](distm[i].RemoveFront()); } } free (distm); // build an M-tree under each parent int nInit = nSamples; MT *subtree = new MT; GiSTlist<char *> subtreeNames; // list of the subtrees names GiSTlist<MTentry *> topEntries; // list of the parent entries of each subtree int nCreated = 0, minHeight = MAXINT; char newName[50]; for (int i=0; i<nInit; i++) { sprintf (newName, "%s.%i", name, ++nCreated); unlink (newName); subtree->Create(newName); // create the new subtree subtree->BulkLoad(array[i], ns[i], padFactor, newName); // build the subtree GiSTpath path; path.MakeRoot (); MTnode *subtreeRoot = (MTnode *) subtree->ReadNode(path); if (subtreeRoot->IsUnderFull(*Store())) { // if the subtree root node is underfilled, we have to split the tree GiSTlist<MTentry *> *parentEntries = new GiSTlist<MTentry *>; GiSTlist<char *> *newTreeNames = subtree->SplitTree(&nCreated, subtree->TreeHeight()-1, parentEntries, name); // split the tree nSamples--; while (!newTreeNames->IsEmpty()) { // insert all the new trees in the subtrees list subtreeNames.Append (newTreeNames->RemoveFront()); MTentry *entry = parentEntries->RemoveFront(); for (int j=0; j<n; j++) { if (data[j]->object() == entry->object()) { // append the parent entry to the list topEntries.Append (data[j]); break; } } delete entry; nSamples++; } delete newTreeNames; delete parentEntries; minHeight = MIN (minHeight, subtree->TreeHeight()-1); } else { subtreeNames.Append (strdup(newName)); topEntries.Append (data[samples[i]]); minHeight = MIN (minHeight, subtree->TreeHeight()); } delete subtreeRoot; subtree->Close(); delete subtree->Store(); // it was created in subtree->Create() } delete []samples; for (int i=0; i<nInit; i++) { for (int j=0; j<ns[i]; j++) { delete array[i][j]; } delete []array[i]; } delete []array; delete []ns; // fix the subtree height GiSTlist<char *> subtreeNames2; // list of the subtrees names GiSTlist<MTentry *> topEntries2; // list of the parent entries of each subtree while (!topEntries.IsEmpty()) { // insert the trees in the list (splitting trees if necessary) MTentry *parentEntry = topEntries.RemoveFront (); char *tmp = subtreeNames.RemoveFront (); strcpy (newName, tmp); delete []tmp; subtree->Open(newName); if (subtree->TreeHeight() > minHeight) { // we have to split the tree to reduce its height nSamples--; GiSTlist<MTentry *> *parentEntries = new GiSTlist<MTentry *>; GiSTlist<char *> *newTreeNames = subtree->SplitTree(&nCreated, minHeight, parentEntries, name); // split the tree while (!newTreeNames->IsEmpty()) { // insert all the new trees in the subtrees list subtreeNames2.Append (newTreeNames->RemoveFront()); MTentry *entry = parentEntries->RemoveFront(); for (int j=0; j<n; j++) { if (data[j]->object() == entry->object()) { // append the parent entry to the parents list topEntries2.Append (data[j]); break;; } } delete entry; nSamples++; } delete newTreeNames; delete parentEntries; } else { // simply insert the tree and its parent entry to the lists subtreeNames2.Append (strdup(newName)); topEntries2.Append (parentEntry); } subtree->Close(); delete subtree->Store(); // it was created in tree->Open() } // build the super tree upon the parents MTentry **topEntrArr = new MTentry *[nSamples]; // array of the parent entries for each subtree char **subNameArr = new char *[nSamples]; // array of the subtrees names for (int i=0; i<nSamples; i++) { // convert the lists into arrays topEntrArr[i] = topEntries2.RemoveFront (); subNameArr[i] = subtreeNames2.RemoveFront (); } assert (topEntries2.IsEmpty()); assert (subtreeNames2.IsEmpty()); sprintf (newName, "%s.0", name); BulkLoad (topEntrArr, nSamples, padFactor, newName); // attach each subtree to the leaves of the super tree GiSTpath path; path.MakeRoot (); MTnode *node = (MTnode *) ReadNode (path); GiSTlist<MTnode *> *oldList = new GiSTlist<MTnode *>; // upper level nodes oldList->Append(node); int level = node->Level(); while (level > 0) { // build the leaves list for super tree GiSTlist<MTnode *> *newList = new GiSTlist<MTnode *>; // lower level nodes while (!oldList->IsEmpty()) { node = oldList->RemoveFront(); path = node->Path(); node->SetLevel(node->Level() + minHeight); // update level of the upper nodes of the super tree WriteNode (node); for (int i=0; i<node->NumEntries(); i++) { MTentry *entry = (MTentry *) (*node)[i].Ptr(); path.MakeChild (entry->Ptr()); newList->Append((MTnode *)ReadNode(path)); path.MakeParent (); } delete node; } delete oldList; oldList = newList; level--; } while (!oldList->IsEmpty()) { // attach each subtree to its leaf node = oldList->RemoveFront(); // retrieve next leaf (root of subtree) node->SetLevel(minHeight); // update level of the root of the subtree path = node->Path(); for (int i=0; i<node->NumEntries(); i++) { MTentry *entry = (MTentry *) (*node)[i].Ptr(); path.MakeChild(Store()->Allocate()); MTnode *newNode = (MTnode *) CreateNode (); newNode->Path() = path; entry->SetPtr(path.Page()); path.MakeParent (); int j = 0; for (; entry->object() != topEntrArr[j]->object(); j++); // search the position to append subtree->Open(subNameArr[j]); GiSTpath rootPath; rootPath.MakeRoot (); Append (newNode, (MTnode *)subtree->ReadNode(rootPath)); // append this subtree to the super tree subtree->Close(); delete subtree->Store(); // it was created in tree->Open() delete newNode; } WriteNode (node); delete node; } subtree->Open(subNameArr[0]); // in order to destroy the object tree delete subtree; for (int i=0; i<nSamples; i++) { delete []subNameArr[i]; } delete []subNameArr; delete []topEntrArr; // update radii of the upper nodes of the result M-tree path.MakeRoot (); node = (MTnode *) ReadNode (path); oldList->Append(node); level = node->Level(); while (level >= minHeight) { // build the list of the nodes which radii should be recomputed GiSTlist<MTnode *> *newList = new GiSTlist<MTnode *>; while (!oldList->IsEmpty()) { node = oldList->RemoveFront(); path = node->Path(); for (int i=0; i<node->NumEntries(); i++) { path.MakeChild ((*node)[i].Ptr()->Ptr()); newList->Append((MTnode *)ReadNode(path)); path.MakeParent (); } delete node; } delete oldList; oldList = newList; level--; } while (!oldList->IsEmpty()) { // adjust the radii of the nodes MTnode *node = oldList->RemoveFront(); AdjKeys (node); delete node; } delete oldList; for (int i=0; i<=nCreated; i++) { // delete all temporary subtrees sprintf (newName, "%s.%i", name, i); unlink (newName); } } else { // we can insert all the entries in a single node GiSTpath path; path.MakeRoot (); GiSTnode *node = ReadNode (path); for (int i=0; i<n; i++) { node->Insert(*(data[i])); } assert (!node->IsOverFull(*Store())); WriteNode (node); delete node; } }
void GiST::Split (GiSTnode **node, const GiSTentry& entry) { int went_left = 0, new_root = 0; if ((*node)->Path().IsRoot()) { new_root = 1; (*node)->Path().MakeChild(store->Allocate()); } GiSTnode *node2 = (*node)->PickSplit(); node2->Path().MakeSibling(store->Allocate()); GiSTentry *e = (*node)->SearchPtr(entry.Ptr()); if (e != NULL) { went_left = 1; delete e; } node2->SetSibling((*node)->Sibling()); (*node)->SetSibling(node2->Path().Page()); WriteNode (*node); WriteNode (node2); GiSTentry *e1 = (*node)->Union(); GiSTentry *e2 = node2->Union(); e1->SetPtr((*node)->Path().Page()); e2->SetPtr(node2->Path().Page()); // Create new root if root is being split if (new_root) { GiSTnode *root = NewNode (this); root->SetLevel((*node)->Level() + 1); root->InsertBefore(*e1, 0); root->InsertBefore(*e2, 1); root->Path().MakeRoot(); WriteNode (root); delete root; } else { // Insert entry for N' in parent GiSTpath parent_path = (*node)->Path(); parent_path.MakeParent (); GiSTnode *parent = ReadNode (parent_path); // Find the entry for N in parent GiSTentry *e = parent->SearchPtr((*node)->Path().Page()); assert (e != NULL); // Insert the new entry right after it int pos = e->Position(); parent->DeleteEntry(pos); parent->InsertBefore(*e1, pos); parent->InsertBefore(*e2, pos+1); delete e; if (!parent->IsOverFull(*store)) { WriteNode (parent); } else { Split (&parent, went_left? *e1: *e2); GiSTpage page = (*node)->Path().Page(); (*node)->Path() = parent->Path(); // parent's path may changed (*node)->Path().MakeChild (page); page = node2->Path().Page(); node2->Path() = (*node)->Path(); node2->Path().MakeSibling (page); } delete parent; } if (!went_left) { delete *node; *node = node2; // return it } else { delete node2; } delete e1; delete e2; }