GiSTlist<GiSTentry*> GiSTnode::Search(const GiSTpredicate &query) const { GiSTlist<GiSTentry*> list; for (int i=0; i<numEntries; i++) { GiSTentry *e = (*this)[i]; if (query.Consistent(*e)) list.Append((GiSTentry*)e->Copy()); } return list; }
GiSTlist<GiSTentry*> GiST::RemoveTop(GiSTnode *node) { GiSTlist<GiSTentry*> deleted; int count=node->NumEntries(); // default: remove the first ones on the page int num_rem=(int)((count+1)*RemoveRatio()+0.5); for(int i=num_rem-1; i>=0; i--) { deleted.Append((GiSTentry *)(*node)[i].Ptr()->Copy()); node->DeleteEntry(i); } return(deleted); }
void GiST::OverflowTreatment (GiSTnode *node, const GiSTentry& entry, int *splitvec) { // remove the "top" p entries from the node GiSTlist<GiSTentry*> deleted = RemoveTop (node); WriteNode (node); AdjustKeys (node, NULL); // note that we've seen this level already splitvec[node->Level()] = 1; // for each of the deleted entries, call InsertHelper at this level while (!deleted.IsEmpty()) { GiSTentry *tmpentry = deleted.RemoveFront (); InsertHelper (*tmpentry, node->Level(), splitvec); delete tmpentry; } }
// split this M-tree into a list of trees having height level, which is used in the "splitting" phase of the BulkLoad algorithm // nCreated is the number of created subtrees, // level is the split level for the tree, // children is the list of the parents of each subtree, // name is the root for the subtrees names // the return value is the list of splitted subtrees's names GiSTlist<char *> * MT::SplitTree (int *nCreated, int level, GiSTlist<MTentry *> *parentEntries, const char *name) { GiSTlist<MTnode *> *oldList = new GiSTlist<MTnode *>; // upper level nodes MTnode *node = new MTnode; // this is because the first operation on node is a delete GiSTpath path; path.MakeRoot (); oldList->Append((MTnode *) ReadNode(path)); // insert the root do { // build the roots list GiSTlist<MTnode *> *newList = new GiSTlist<MTnode *>; // lower level nodes while (!oldList->IsEmpty()) { delete node; // delete the old node created by ReadNode node = oldList->RemoveFront(); // retrieve next node to be examined path = node->Path(); for (int i=0; i<node->NumEntries(); i++) { // append all its children to the new list path.MakeChild ((*node)[i].Ptr()->Ptr()); newList->Append((MTnode *)ReadNode(path)); path.MakeParent (); } } delete oldList; oldList = newList; } while (node->Level() > level); // stop if we're at the split level delete node; GiSTlist<char *> *newTreeNames = new GiSTlist<char *>; // this is the results list while (!oldList->IsEmpty()) { // now append each sub-tree to its root char newName[50]; sprintf (newName, "%s.%i", name, ++(*nCreated)); unlink (newName); // if this M-tree already exists, delete it MT *newTree = new MT; newTree->Create(newName); // create a new M-tree path.MakeRoot (); MTnode *rootNode = (MTnode *) newTree->ReadNode(path); // read the root of the new tree node = oldList->RemoveFront(); newTree->Append(rootNode, (MTnode *)node->Copy()); // append the current node to the root of new tree parentEntries->Append(node->ParentEntry()); // insert the original parent entry into the list newTreeNames->Append(strdup(newName)); // insert the new M-tree name into the list delete node; delete rootNode; delete newTree; } delete oldList; return newTreeNames; }
// append the subtree rooted at from to the node to, which is used in the "append" phase of the BulkLoad algorithm void MT::Append (MTnode *to, MTnode *from) { GiSTlist<MTnode *> *oldList = new GiSTlist<MTnode *>; // upper level nodes to append oldList->Append(from); GiSTlist<GiSTpath> pathList; pathList.Append (to->Path()); MTnode *node = new MTnode, *newNode = NULL; MT *fromTree = (MT *) from->Tree(); do { GiSTlist<MTnode *> *newList = new GiSTlist<MTnode *>; // lower level nodes to append while (!oldList->IsEmpty()) { delete node; node = oldList->RemoveFront(); GiSTpath path = pathList.RemoveFront (); newNode = (MTnode *) ReadNode (path); // node to be appended for (int i=0; i<node->NumEntries(); i++) { MTentry *entry = (MTentry *) (*node)[i].Ptr()->Copy(); if (node->Level() > 0) { // if node isn't a leaf, we've to allocate its children GiSTpath nodePath = node->Path(); nodePath.MakeChild (entry->Ptr()); newList->Append((MTnode *) fromTree->ReadNode(nodePath)); entry->SetPtr(Store()->Allocate()); // allocate its child in the inserted tree path.MakeChild (entry->Ptr()); MTnode *childNode = (MTnode *) CreateNode (); childNode->Path() = path; childNode->SetTree(this); WriteNode (childNode); // write the empty node delete childNode; pathList.Append (path); path.MakeParent (); } newNode->Insert(*entry); delete entry; } newNode->SetLevel(node->Level()); WriteNode (newNode); // write the node delete newNode; } delete oldList; oldList = newList; } while (node->Level() > 0); // until we reach the leaves' level delete node; delete oldList; }
void MT::CollectStats () { GiSTpath path; path.MakeRoot (); GiSTnode *node = ReadNode (path); if (!node->IsLeaf()) { int maxLevel = node->Level(); double *radii = new double[maxLevel]; int *pages = new int[maxLevel]; for (int i=0; i<maxLevel; i++) { pages[i] = 0; radii[i] = 0; } TruePredicate truePredicate; GiSTlist<GiSTentry*> list = node->Search(truePredicate); // retrieve all the entries in this node double overlap = ((MTnode *)node)->Overlap(); double totalOverlap = overlap; delete node; while (!list.IsEmpty()) { GiSTentry *entry = list.RemoveFront (); path.MakeChild (entry->Ptr()); node = ReadNode (path); overlap = ((MTnode *)node)->Overlap(); totalOverlap += overlap; pages[node->Level()]++; radii[node->Level()] += ((MTkey *) entry->Key())->MaxRadius(); GiSTlist<GiSTentry*> newlist; if (!node->IsLeaf()) { newlist = node->Search(truePredicate); // recurse to next level } while (!newlist.IsEmpty()) { list.Append (newlist.RemoveFront ()); } path.MakeParent (); delete entry; delete node; } // output the results cout << "Level:\tPages:\tAverage_Radius:"<<endl; int totalPages = 1; // for the root for (int i=maxLevel-1; i>=0; i--) { totalPages += pages[i]; cout << i << ":\t" << pages[i] << "\t" << radii[i]/pages[i] << endl; } cout << "TotalPages:\t" << totalPages << endl; cout << "LeafPages:\t" << pages[0] << endl; cout << "TotalOverlap:\t" << (float)totalOverlap << endl; delete []radii; delete []pages; } else { delete node; } }
GiSTentry* GiSTcursor::Next() { GiSTpage page; while (first || !stack.IsEmpty()) { if (first) { page = GiSTRootPage; first = 0; } else { assert(lastlevel >= 0); GiSTentry *entry = stack.RemoveFront(); if (entry->IsLeaf()) return entry; // Pop off the stack for (int i=0; i < entry->Level() - lastlevel; i++) path.MakeParent(); page = entry->Ptr(); delete entry; } // Entry was a pointer to another node path.MakeChild(page); GiSTnode *node = gist.ReadNode(path); lastlevel = node->Level(); GiSTlist<GiSTentry*> list = node->Search(*query); while (!list.IsEmpty()) { GiSTentry *entry = list.RemoveRear(); stack.Prepend(entry); } delete node; } // The search is over... return NULL; }
void GiST::DumpNode (ostream& os, GiSTpath path) const { GiSTnode *node = ReadNode(path); node->Print(os); if (!node->IsLeaf()) { TruePredicate truePredicate; GiSTlist<GiSTentry*> list = node->Search(truePredicate); while (!list.IsEmpty()) { GiSTentry *e = list.RemoveFront(); path.MakeChild(e->Ptr()); DumpNode (os, path); path.MakeParent(); delete e; } } delete node; }
GiSTlist<GiSTentry*> RT::RemoveTop(GiSTnode *node) { GiSTlist<GiSTentry*> deleted; int count = node->NumEntries(); int num_rem = (int)((count + 1)*RemoveRatio() + 0.5); distix *dvec = new distix[node->NumEntries()]; int *ivec = new int[num_rem]; RTentry *uentry = (RTentry *)(node->Union()); RTkey tmpbox; int i; // compute distance of each node to center of bounding box, // and sort by decreasing distance for (i = 0; i < node->NumEntries(); i++) { dvec[i].ix = i; tmpbox = ((RTentry *)((*node)[i].Ptr()))->bbox(); dvec[i].dist = tmpbox.dist(uentry->bbox()); } delete uentry; qsort(dvec, node->NumEntries(), sizeof(distix), GiSTdistixcmp); for (i = 0; i < num_rem; i++) ivec[i] = dvec[i].ix; delete dvec; // sort the first num_rem by index number to make removal easier qsort(ivec, num_rem, sizeof(int), GiSTintcmp); for (i = num_rem - 1; i >=0 ; i--) { RTentry *tmpentry = new RTentry(*(RTentry *)((*node)[ivec[i]].Ptr())); deleted.Append(tmpentry); node->DeleteEntry(ivec[i]); } delete ivec; return(deleted); }
int main() { MXTree *tree = new MXTree; tree->Open(MXTreePath.c_str()); assert(tree->IsOpen()); time_t time_start, time_end; time(&time_start); ifstream fin(path.c_str()); for (int i=0; i<amount/1000; i++) { Object *obj = Read(fin, rand()%amount); Pred *pred = new Pred(*obj); delete obj; SimpleQuery query(pred, 40); delete pred; GiSTlist<MTentry *> list = tree->RangeSearch(query); while (!list.IsEmpty()) { MTentry *e = list.RemoveFront(); ++objs; delete e; } Progress(i, 1200); } fin.close(); time(&time_end); cout<<difftime(time_end, time_start)<<endl; delete tree; cout << "Computed dists = " << compdists << endl; cout << "IO reads = " << IOread << endl; cout << "IO writes = " << IOwrite << endl; cout << "Objs = " << objs << endl; return 0; }
GiSTlist<MTentry *> MTnode::RangeSearch(const MTquery &query) { GiSTlist<MTentry *> result; if(IsLeaf()) for(int i=0; i<NumEntries(); i++) { MTentry *e=(MTentry *)(*this)[i].Ptr()->Copy(); MTquery *q=(MTquery *)query.Copy(); if(q->Consistent(*e)) { // object qualifies e->setmaxradius(q->Grade()); result.Append(e); } else delete e; delete q; } else for(int i=0; i<NumEntries(); i++) { MTentry *e=(MTentry *)(*this)[i].Ptr(); MTquery *q=(MTquery *)query.Copy(); if(q->Consistent(*e)) { // sub-tree not excluded GiSTpath childpath=Path(); MTnode *child; GiSTlist<MTentry *>list; childpath.MakeChild(e->Ptr()); child=(MTnode *)((MT *)Tree())->ReadNode(childpath); list=child->RangeSearch(*q); // recurse the search while(!list.IsEmpty()) result.Append(list.RemoveFront()); delete child; } delete q; } return result; }
GiSTlist<MTentry *> MTnode::RangeSearch (const MTquery &query) { GiSTlist<MTentry *> results; if (IsLeaf()) { for (int i=0; i<NumEntries(); i++) { MTentry *entry = (MTentry *) (*this)[i].Ptr()->Copy(); MTquery *newQuery = (MTquery *) query.Copy(); if (newQuery->Consistent(*entry)) { // object qualifies entry->SetMaxRadius(newQuery->Grade()); results.Append (entry); } else { delete entry; } delete newQuery; } } else { for (int i=0; i<NumEntries(); i++) { MTentry *entry = (MTentry *) (*this)[i].Ptr(); MTquery *newQuery = (MTquery *) query.Copy(); if (newQuery->Consistent(*entry)) { // sub-tree included GiSTpath childPath = Path (); childPath.MakeChild (entry->Ptr()); MTnode *childNode = (MTnode *) ((MT *)Tree())->ReadNode(childPath); GiSTlist<MTentry *> childResults = childNode->RangeSearch(*newQuery); // recurse the search while (!childResults.IsEmpty()) { results.Append (childResults.RemoveFront()); } delete childNode; } delete newQuery; } } return results; }
int main(int argc, char **argv) { // std::cerr << "Now starting...\n"; // malloc_stats(); int i=1; char cmdLine[15]; BOOL end=FALSE; compdists=IOread=IOwrite=objs=0; std::cout << "** MTree: An M-Tree based on Generalized Search Trees\n"; while(strcmp(cmdLine, "quit")) { scanf("%s", cmdLine); if(!strcmp(cmdLine, "drop")) { CommandDrop("graphs.M3"); if(argc<5) { std::cout << "Usage is: MTree [min_util] [split_f] [promote_f] [sec_promote_f] ([vote_f] ([n_cand]|[radius_f]))\n"; exit(-1); } MIN_UTIL=atof(argv[1]); SPLIT_FUNCTION=(s_function)atoi(argv[2]); PROMOTE_PART_FUNCTION=(pp_function)atoi(argv[3]); SECONDARY_PART_FUNCTION=(pp_function)atoi(argv[4]); if(SECONDARY_PART_FUNCTION==CONFIRMED) { std::cout << "The secondary promotion function must be an unconfirmed one\n"; exit(-1); } if(PROMOTE_PART_FUNCTION==SAMPLING) { if(argc<6) { std::cout << "Usage is: MTree [min_util] [split_f] [promote_f] ([vote_f] ([n_cand]|[radius_f]))\n"; exit(-1); } NUM_CANDIDATES=atoi(argv[5]); } if(PROMOTE_PART_FUNCTION==CONFIRMED) { if(argc<6) { std::cout << "Usage is: MTree [min_util] [split_f] [promote_f] [sec_promote_f] ([vote_f] ([n_cand]|[radius_f]))\n"; exit(-1); } PROMOTE_VOTE_FUNCTION=(pv_function)atoi(argv[5]); if(PROMOTE_VOTE_FUNCTION==SAMPLINGV) { if(argc<7) { std::cout << "Usage is: MTree [min_util] [split_f] [promote_f] ([vote_f] ([n_cand]|[radius_f]))\n"; exit(-1); } NUM_CANDIDATES=atoi(argv[6]); } else if(PROMOTE_VOTE_FUNCTION==mM_RAD) { if(argc<7) { std::cout << "Usage is: MTree [min_util] [split_f] [promote_f] ([vote_f] ([n_cand]|[radius_f]))\n"; exit(-1); } RADIUS_FUNCTION=(r_function)atoi(argv[6]); } } switch(SPLIT_FUNCTION) { case G_HYPERPL: std::cout << "G_HYPL, "; break; case BAL_G_HYPERPL: std::cout << "BAL_G_HYPL, "; break; case BALANCED: std::cout << "BAL, "; break; } switch(PROMOTE_PART_FUNCTION) { case RANDOM: std::cout << "RAN_2 "; break; case MAX_UB_DIST: std::cout << "M_UB_d "; break; case SAMPLING: std::cout << "SAMP" << NUM_CANDIDATES << "_2 "; break; case MIN_RAD: std::cout << "m_R_2 "; break; case MIN_OVERLAPS: std::cout << "m_O_2 "; break; case CONFIRMED: switch(PROMOTE_VOTE_FUNCTION) { case RANDOMV: std::cout << "RAN_1 "; break; case SAMPLINGV: std::cout << "SAMP" << NUM_CANDIDATES << "_1 "; break; case MAX_LB_DIST: std::cout << "M_LB_d "; break; case mM_RAD: std::cout << "mM_"; switch(RADIUS_FUNCTION) { case LB: std::cout << "m"; break; case AVG: std::cout << "A"; break; case UB: std::cout << "M"; break; } std::cout << "_r "; break; } break; } switch(SECONDARY_PART_FUNCTION) { case RANDOM: std::cout << "(RAN_2)\n"; break; case MAX_UB_DIST: std::cout << "(M_UB_d)\n"; break; case SAMPLING: std::cout << "(SAMP" << NUM_CANDIDATES << "_2)\n"; break; case MIN_RAD: std::cout << "(m_R_2)\n"; break; case MIN_OVERLAPS: std::cout << "(m_O_2)\n"; break; } CommandCreate("mtree", "graphs.M3"); } else if(!strcmp(cmdLine, "select")) { MTobject *obj=Read(); Pred *pred=new Pred(*obj); double r; scanf("%s", cmdLine); r=atof(cmdLine); SimpleQuery query(pred, r); delete obj; delete pred; if(!gist) CommandOpen("mtree", "graphs.M3"); CommandSelect(query); CommandClose(); } else if((!strcmp(cmdLine, "nearest"))||(!strcmp(cmdLine, "farthest"))) { int k; BOOL nearest=strcmp(cmdLine, "farthest"); MTpred *pred; MTobject *obj=Read(); scanf("%s", cmdLine); k=atoi(cmdLine); if(nearest) pred=new Pred(*obj); else { MTpred *npred=new Pred(*obj); pred=new NotPred(npred); delete npred; } // eps=atof(argv[1]); TopQuery query(pred, k); delete pred; if(!gist) CommandOpen("mtree", "graphs.M3"); CommandNearest(query); CommandClose(); delete obj; } else if(!strcmp(cmdLine, "cursor")) { MTobject *obj=Read(); Pred pred(*obj); if(!gist) CommandOpen("mtree", "graphs.M3"); MTcursor cursor(*gist, pred); scanf("%s", cmdLine); while(strcmp(cmdLine, "close")) { if(!strcmp(cmdLine, "next")) { int k; GiSTlist<MTentry *> list; scanf("%s", cmdLine); k=atoi(cmdLine); // std::cout << "Fetching next " << k << " entries...\n"; for(; k>0; k--) list.Append(cursor.Next()); while(!list.IsEmpty()) { MTentry *e=list.RemoveFront(); // std::cout << e; delete e; objs++; } } scanf("%s", cmdLine); } delete obj; CommandClose(); } /* else if(!strcmp(cmdLine, "find")) { int n, k, l, oldcompdists, oldIOread, oldobjs; scanf("%s", cmdLine); n=atoi(cmdLine); double **x=(double **)calloc(n, sizeof(double *)); for(i=0; i<n; i++) x[i]=(double *)calloc(dimension, sizeof(double)); MTpred **p=(MTpred **)calloc(n, sizeof(MTpred *)); AndPred **ap=(AndPred **)calloc(n-1, sizeof(AndPred *)); for(i=0; i<n; i++) { for(int j=0; j<dimension; j++) { scanf("%s", cmdLine); x[i][j]=atof(cmdLine); } if(x[i][0]>=0) { MTobject obj(x[i]); // std::cout << "obj=" << obj << std::endl; p[i]=new Pred(obj); } else { x[i][0]=-x[i][0]; MTobject obj(x[i]); // std::cout << "obj=" << obj << std::endl; Pred *pr=new Pred(obj); p[i]=new NotPred(pr); delete pr; } // std::cout << "pred=" << *p[i] << std::endl; } if(n==2) std::cout << "d=" << p[1]->distance(((Pred *)p[0])->obj()) << std::endl; ap[0]=new AndPred(p[0], p[1]); for(i=1; i<n-1; i++) ap[i]=new AndPred(ap[i-1], p[i+1]); // std::cout << "Query: " << *ap[n-2] << std::endl; scanf("%s", cmdLine); k=atoi(cmdLine); compdists=IOread=IOwrite=0; TopQuery q(ap[n-2], k); if(!gist) CommandOpen("mtree", "graphs.M3"); CommandNearest(q); std::cout << "Computed dists=" << compdists << "\nIO reads=" << IOread << "\nIO writes=" << IOwrite << "\nObjs=" << objs << std::endl; BOOL (*obs)[IndObjs]=new BOOL [n][IndObjs], pass=FALSE; l=-90; do { int j; for(j=0; j<IndObjs; j++) for(i=0; i<n; i++) obs[i][j]=FALSE; compdists=IOread=IOwrite=objs=0; l+=100; for(i=0; i<n; i++) { TopQuery qi(p[i], l); GiSTlist<GiSTobject *> list=gist->TopSearch(qi); while(!list.IsEmpty()) { MTentry *e=(MTentry *)list.RemoveFront(); obs[i][e->Ptr()]=TRUE; delete e; } } for(j=0; j<IndObjs; j++) { BOOL check=TRUE; for(i=0; (i<n)&✓ i++) check=obs[i][j]; if(check) objs++; } // std::cout << l << "=>" << objs << std::endl; if(objs>k) { pass=TRUE; l-=110; oldcompdists=compdists; oldIOread=IOread; oldobjs=objs; } if(!pass) { oldcompdists=compdists; oldIOread=IOread; oldobjs=objs; } // else if(objs==0) l+=90; // dangerous: could lead to infinite loops... } while(((objs<k)&&!pass)||((objs>k)&&pass)); std::cout << l << "=>" << objs << std::endl; if(objs<k) std::cout << "Computed dists=" << oldcompdists << "\nIO reads=" << oldIOread << "\nObjs=" << oldobjs << std::endl; else std::cout << "Computed dists=" << compdists << "\nIO reads=" << IOread << "\nObjs=" << objs << std::endl; delete []obs; for(i=0; i<n; i++) delete x[i]; free(x); for(i=0; i<n; i++) delete p[i]; free(p); for(i=0; i<n-1; i++) delete ap[i]; free(ap); compdists=IOread=IOwrite=objs=0; CommandClose(); } */ else if(!strcmp(cmdLine, "check")) { if(!gist) CommandOpen("mtree", "graphs.M3"); CommandCheck(); CommandClose(); } else if(!strcmp(cmdLine, "dump")) { if(!gist) CommandOpen("mtree", "graphs.M3"); CommandDump(); CommandClose(); } else if(!strcmp(cmdLine, "stats")) { if(!gist) CommandOpen("mtree", "graphs.M3"); CommandStats(); CommandClose(); } else if(!strcmp(cmdLine, "add")) { if(!gist) CommandOpen("mtree", "graphs.M3"); scanf("%s", cmdLine); i=atoi(cmdLine); if(argc<5) { std::cout << "Usage is: MTree [min_util] [split_f] [promote_f] [sec_promote_f] ([vote_f] ([n_cand]|[radius_f]))\n"; exit(-1); } MIN_UTIL=atof(argv[1]); SPLIT_FUNCTION=(s_function)atoi(argv[2]); PROMOTE_PART_FUNCTION=(pp_function)atoi(argv[3]); SECONDARY_PART_FUNCTION=(pp_function)atoi(argv[4]); if(SECONDARY_PART_FUNCTION==CONFIRMED) { std::cout << "The secondary promotion function must be an unconfirmed one\n"; exit(-1); } if(PROMOTE_PART_FUNCTION==SAMPLING) { if(argc<6) { std::cout << "Usage is: MTree [min_util] [split_f] [promote_f] ([vote_f] ([n_cand]|[radius_f]))\n"; exit(-1); } NUM_CANDIDATES=atoi(argv[5]); } if(PROMOTE_PART_FUNCTION==CONFIRMED) { if(argc<6) { std::cout << "Usage is: MTree [min_util] [split_f] [promote_f] [sec_promote_f] ([vote_f] ([n_cand]|[radius_f]))\n"; exit(-1); } PROMOTE_VOTE_FUNCTION=(pv_function)atoi(argv[5]); if(PROMOTE_VOTE_FUNCTION==SAMPLINGV) { if(argc<7) { std::cout << "Usage is: MTree [min_util] [split_f] [promote_f] ([vote_f] ([n_cand]|[radius_f]))\n"; exit(-1); } NUM_CANDIDATES=atoi(argv[6]); } else if(PROMOTE_VOTE_FUNCTION==mM_RAD) { if(argc<7) { std::cout << "Usage is: MTree [min_util] [split_f] [promote_f] ([vote_f] ([n_cand]|[radius_f]))\n"; exit(-1); } RADIUS_FUNCTION=(r_function)atoi(argv[6]); } } switch(SPLIT_FUNCTION) { case G_HYPERPL: std::cout << "G_HYPL, "; break; case BAL_G_HYPERPL: std::cout << "BAL_G_HYPL, "; break; case BALANCED: std::cout << "BAL, "; break; } switch(PROMOTE_PART_FUNCTION) { case RANDOM: std::cout << "RAN_2 "; break; case MAX_UB_DIST: std::cout << "M_UB_d "; break; case SAMPLING: std::cout << "SAMP" << NUM_CANDIDATES << "_2 "; break; case MIN_RAD: std::cout << "m_R_2 "; break; case MIN_OVERLAPS: std::cout << "m_O_2 "; break; case CONFIRMED: switch(PROMOTE_VOTE_FUNCTION) { case RANDOMV: std::cout << "RAN_1 "; break; case SAMPLINGV: std::cout << "SAMP" << NUM_CANDIDATES << "_1 "; break; case MAX_LB_DIST: std::cout << "M_LB_d "; break; case mM_RAD: std::cout << "mM_"; switch(RADIUS_FUNCTION) { case LB: std::cout << "m"; break; case AVG: std::cout << "A"; break; case UB: std::cout << "M"; break; } std::cout << "_r "; break; } break; } switch(SECONDARY_PART_FUNCTION) { case RANDOM: std::cout << "(RAN_2)\n"; break; case MAX_UB_DIST: std::cout << "(M_UB_d)\n"; break; case SAMPLING: std::cout << "(SAMP" << NUM_CANDIDATES << "_2)\n"; break; case MIN_RAD: std::cout << "(m_R_2)\n"; break; case MIN_OVERLAPS: std::cout << "(m_O_2)\n"; break; } } else if(!strcmp(cmdLine, "insert")) { MTobject *obj=Read(); if(!gist) CommandOpen("mtree", "graphs.M3"); CommandInsert(MTkey(*obj, 0, 0), i++); delete obj; } else if(!strcmp(cmdLine, "load")) { MTentry **entries; int n; if(argc<2) { std::cout << "Usage is: MTree [min_util]\n"; exit(-1); } MIN_UTIL=atof(argv[1]); i=0; scanf("%s", cmdLine); n=atoi(cmdLine); entries=new MTentry*[n]; for(i=0; i<n; i++) { MTobject *obj=Read(); entries[i]=new MTentry(MTkey(*obj, 0, 0), i); delete obj; } CommandLoad("graphs.M3", entries, n); for(i=0; i<n; i++) delete entries[i]; delete []entries; } } std::cout << "Computed dists=" << compdists << "\nIO reads=" << IOread << "\nIO writes=" << IOwrite << "\nObjs=" << objs << std::endl; CommandQuit(); // std::cerr << "Now exiting...\n"; // malloc_stats(); }
// handle underfull leaf nodes int GiST::CondenseTree(GiSTnode *node) { GiSTlist<GiSTentry*> Q; int deleted=0; // Must be condensing a leaf assert(node->IsLeaf()); while(!node->Path().IsRoot()) { GiSTpath parent_path=node->Path(); parent_path.MakeParent(); GiSTnode *P=ReadNode(parent_path); GiSTentry *En=P->SearchPtr(node->Path().Page()); assert(En!=NULL); // Handle under-full node if(node->IsUnderFull(*store)) { if(!IsOrdered()) { TruePredicate truePredicate; GiSTlist<GiSTentry*> list=node->Search(truePredicate); while(!list.IsEmpty()) { GiSTentry *e=list.RemoveFront(); Q.Append(e); } P->DeleteEntry(En->Position()); WriteNode(P); deleted=1; AdjustKeys(P, NULL); } else { // Try to borrow entries, else coalesce with a neighbor // Have to look at left sibling??? GiSTpage neighbor_page=P->SearchNeighbors(node->Path().Page()); GiSTpath neighbor_path=node->Path(); neighbor_path.MakeSibling(neighbor_page); if(neighbor_page!=0) { GiSTnode *neighbor; // If neighbor is RIGHT sibling... if(node->Sibling()==neighbor_page) neighbor=ReadNode(neighbor_path); else { neighbor=node; node=ReadNode(neighbor_path); } GiSTentry *e=P->SearchPtr(node->Path().Page()); node->Coalesce(*neighbor, *e); delete e; // If not overfull, coalesce, kill right node if(!node->IsOverFull(*store)) { node->SetSibling(neighbor->Sibling()); WriteNode(node); // Delete the neighbor from parent GiSTentry *e=P->SearchPtr(neighbor->Path().Page()); P->DeleteEntry(e->Position()); WriteNode(P); delete e; store->Deallocate(neighbor->Path().Page()); deleted=1; } // If overfull, split (same as borrowing) else { GiSTnode *node2=node->PickSplit(); node2->Path()=neighbor->Path(); node2->SetSibling(neighbor->Sibling()); WriteNode(node); WriteNode(node2); AdjustKeys(node2, &P); delete node2; deleted=1; } delete neighbor; } } } // Adjust covering predicate if(!deleted) AdjustKeys(node, &P); parent_path=node->Path(); parent_path.MakeParent(); delete node; // Propagate deletes if(!deleted) break; node=P; } // Re-insert orphaned entries while(!Q.IsEmpty()) { GiSTentry *e=Q.RemoveFront(); InsertHelper(*e, e->Level()); delete e; } return(deleted); }
// load this M-tree with n data using the BulkLoad algorithm [CP98] // data is an array of n entries // padFactor is the maximum node utilization (use 1) // name is the name of the tree void MT::BulkLoad (MTentry **data, int n, double padFactor, const char *name) { int size = 0; if (EntrySize()) { size = n * (sizeof(GiSTpage) + EntrySize()); // (only valid if we've fixed size entries) } else { for (int i=0; i<n; i++) { size += sizeof(GiSTlte) + sizeof(GiSTpage) + data[i]->CompressedLength(); } } int totSize = size + GIST_PAGE_HEADER_SIZE + sizeof(GiSTlte); if (totSize > Store()->PageSize()) { // we need to split the entries into several sub-trees int numEntries = (int)(Store()->PageSize()*padFactor*n) / totSize; int s = (int) MAX (MIN (numEntries, ceil(((float)n)/numEntries)), numEntries*MIN_UTIL); // initial number of samples int nSamples, *samples = new int[s], *sizes = NULL, *ns = NULL, iter = 0, MAXITER = s * s; GiSTlist<double *> *distm = (GiSTlist<double *> *) calloc (s, sizeof(GiSTlist<double *>)); // relative distances between samples int MINSIZE = (int) (Store()->PageSize()*MIN_UTIL), addEntrySize = EntrySize() ? sizeof(GiSTpage) : sizeof(GiSTlte)+sizeof(GiSTpage); GiSTlist<int> *lists = NULL; // set for each sample set GiSTlist<double> *dists = NULL; // set for distance between each sample and its members BOOL *bSampled = new BOOL[n]; // is this entry in the samples set? // sampling phase do { iter++; if (iter > 1) { // this is a new sampling phase while (!lists[0].IsEmpty()) { lists[0].RemoveFront (); dists[0].RemoveFront (); } delete []lists; delete []dists; delete []sizes; delete []ns; while (!distm[0].IsEmpty()) { delete []distm[0].RemoveFront(); // empty the distance list } for (int i=1; i<s; i++) { distm[i].front = distm[i].rear = NULL; } } if (iter >= MAXITER) { cout << "Too many loops in BulkLoad!"<<endl<<"Please select a lower minimum node utilization or a bigger node size."<<endl; exit(1); } for (int i=0; i<n; i++) { bSampled[i] = FALSE; } nSamples = 0; // pick s samples to create parents while (nSamples < s) { int i; do { i = PickRandom (0, n); } while (bSampled[i]); bSampled[i] = TRUE; samples[nSamples++] = i; } lists = new GiSTlist<int>[s]; dists = new GiSTlist<double>[s]; sizes = new int[s]; ns = new int[s]; for (int i=0; i<s; i++) { sizes[i] = GIST_PAGE_HEADER_SIZE + sizeof(GiSTlte); ns[i] = 1; distm[i].Prepend (new double[s]); } // compute the relative distances between samples for (int i=0; i<s; i++) { for (int j=0; j<i; j++) { distm[j].front->entry[i] = distm[i].front->entry[j] = data[samples[j]]->object().distance(data[samples[i]]->object()); } distm[i].front->entry[i] = 0; } // assign each entry to its nearest parent for (int i=0; i<n; i++) { if (bSampled[i]) { int j = 0; for (; samples[j]!=i; j++); // find this entry in the samples set and return position in it lists[j].Prepend (i); // insert the entry in the right sample dists[j].Prepend (0); // distance between sample and data[i] sizes[j] += addEntrySize + data[i]->CompressedLength(); } else { // here we optimize the distance computations (like we do in the insert algorithm) double *dist = new double[s]; // distance between this non-sample and samples dist[0] = data[samples[0]]->object().distance(data[i]->object()); int minIndex = 0; for (int j=1; j<s; j++) { // seek the nearest sample dist[j] = -MaxDist(); if (fabs (data[samples[j]]->Key()->distance - data[i]->Key()->distance) >= dist[minIndex]) { // pruning continue; } BOOL flag = TRUE; for (int k=0; k<j && flag; k++) { // pruning (other samples) if (dist[k] < 0) { continue; } else { flag = fabs (dist[k] - distm[j].front->entry[k]) < dist[minIndex]; } } if (!flag) { continue; } dist[j] = data[samples[j]]->object().distance(data[i]->object()); // have to compute this distance if (dist[j] < dist[minIndex]) { minIndex = j; } } lists[minIndex].Append (i); // insert the entry in the right sample dists[minIndex].Append (dist[minIndex]); // distance between sample and data[i] sizes[minIndex] += addEntrySize + data[i]->CompressedLength(); ns[minIndex]++; sizes[minIndex] >= MINSIZE ? delete []dist : distm[minIndex].Append (dist); // correspond with lists } } // redistribute underfilled parents int i; while (sizes[i = FindMin (sizes, nSamples)] < MINSIZE) { GiSTlist<int> list = lists[i]; // each sample set while (!dists[i].IsEmpty()) { // clear distance between each sample and its members dists[i].RemoveFront (); } // substitute this set with last set for (int j=0; j<nSamples; j++) { for (GiSTlistnode<double *> *node=distm[j].front; node; node=node->next) { node->entry[i] = node->entry[nSamples-1]; } } GiSTlist<double *> dlist = distm[i]; // relative distances between sample[i] and other samples, reposition by myself distm[i] = distm[nSamples-1]; lists[i] = lists[nSamples-1]; dists[i] = dists[nSamples-1]; samples[i] = samples[nSamples-1]; sizes[i] = sizes[nSamples-1]; ns[i] = ns[nSamples-1]; nSamples--; while (!list.IsEmpty()) { // assign each entry to its nearest parent double *dist = dlist.RemoveFront (); // relative distances between sample[i] (old) and other samples (old) int minIndex = -1; for (int j=0; j<nSamples && minIndex<0; j++) { // search for a computed distance if (dist[j] > 0) { minIndex = j; } } int k = list.RemoveFront (); if (minIndex < 0) { // no distance was computed (i.e. all distances were pruned) dist[0] = data[samples[0]]->object().distance(data[k]->object()); minIndex = 0; } for (int j=0; j<nSamples; j++) { if (j == minIndex) { continue; } if (dist[j] < 0) { // distance wasn't computed if (fabs (data[samples[j]]->Key()->distance - data[k]->Key()->distance) >= dist[minIndex]) { continue; // pruning } BOOL flag = TRUE; for (int i=0; i<j && flag; i++) { // pruning (other samples) if (dist[i] < 0) { continue; } else { flag = fabs (dist[i] - distm[j].front->entry[i]) < dist[minIndex]; } } if (!flag) { continue; } dist[j] = data[samples[j]]->object().distance(data[k]->object()); // have to compute this distance } if (dist[j] < dist[minIndex]) { minIndex = j; } } lists[minIndex].Append (k); dists[minIndex].Append (dist[minIndex]); sizes[minIndex] += addEntrySize + data[k]->CompressedLength(); ns[minIndex]++; sizes[minIndex] >= MINSIZE ? delete []dist : distm[minIndex].Append (dist); // correspond with lists } assert (dlist.IsEmpty()); // so is the list } } while (nSamples == 1); // if there's only one child, repeat the sampling phase MTentry ***array = new MTentry **[nSamples]; // array of the entries for each sub-tree for (int i=0; i<nSamples; i++) { // convert the lists into arrays array[i] = new MTentry *[ns[i]]; for (int j=0; j<ns[i]; j++) { array[i][j] = (MTentry *) data[lists[i].RemoveFront ()]->Copy(); array[i][j]->Key()->distance = dists[i].RemoveFront (); } assert (lists[i].IsEmpty()); assert (dists[i].IsEmpty()); } delete []lists; delete []dists; delete []sizes; delete []bSampled; for (int i=0; i<nSamples; i++) { while (!distm[i].IsEmpty()) { delete [](distm[i].RemoveFront()); } } free (distm); // build an M-tree under each parent int nInit = nSamples; MT *subtree = new MT; GiSTlist<char *> subtreeNames; // list of the subtrees names GiSTlist<MTentry *> topEntries; // list of the parent entries of each subtree int nCreated = 0, minHeight = MAXINT; char newName[50]; for (int i=0; i<nInit; i++) { sprintf (newName, "%s.%i", name, ++nCreated); unlink (newName); subtree->Create(newName); // create the new subtree subtree->BulkLoad(array[i], ns[i], padFactor, newName); // build the subtree GiSTpath path; path.MakeRoot (); MTnode *subtreeRoot = (MTnode *) subtree->ReadNode(path); if (subtreeRoot->IsUnderFull(*Store())) { // if the subtree root node is underfilled, we have to split the tree GiSTlist<MTentry *> *parentEntries = new GiSTlist<MTentry *>; GiSTlist<char *> *newTreeNames = subtree->SplitTree(&nCreated, subtree->TreeHeight()-1, parentEntries, name); // split the tree nSamples--; while (!newTreeNames->IsEmpty()) { // insert all the new trees in the subtrees list subtreeNames.Append (newTreeNames->RemoveFront()); MTentry *entry = parentEntries->RemoveFront(); for (int j=0; j<n; j++) { if (data[j]->object() == entry->object()) { // append the parent entry to the list topEntries.Append (data[j]); break; } } delete entry; nSamples++; } delete newTreeNames; delete parentEntries; minHeight = MIN (minHeight, subtree->TreeHeight()-1); } else { subtreeNames.Append (strdup(newName)); topEntries.Append (data[samples[i]]); minHeight = MIN (minHeight, subtree->TreeHeight()); } delete subtreeRoot; subtree->Close(); delete subtree->Store(); // it was created in subtree->Create() } delete []samples; for (int i=0; i<nInit; i++) { for (int j=0; j<ns[i]; j++) { delete array[i][j]; } delete []array[i]; } delete []array; delete []ns; // fix the subtree height GiSTlist<char *> subtreeNames2; // list of the subtrees names GiSTlist<MTentry *> topEntries2; // list of the parent entries of each subtree while (!topEntries.IsEmpty()) { // insert the trees in the list (splitting trees if necessary) MTentry *parentEntry = topEntries.RemoveFront (); char *tmp = subtreeNames.RemoveFront (); strcpy (newName, tmp); delete []tmp; subtree->Open(newName); if (subtree->TreeHeight() > minHeight) { // we have to split the tree to reduce its height nSamples--; GiSTlist<MTentry *> *parentEntries = new GiSTlist<MTentry *>; GiSTlist<char *> *newTreeNames = subtree->SplitTree(&nCreated, minHeight, parentEntries, name); // split the tree while (!newTreeNames->IsEmpty()) { // insert all the new trees in the subtrees list subtreeNames2.Append (newTreeNames->RemoveFront()); MTentry *entry = parentEntries->RemoveFront(); for (int j=0; j<n; j++) { if (data[j]->object() == entry->object()) { // append the parent entry to the parents list topEntries2.Append (data[j]); break;; } } delete entry; nSamples++; } delete newTreeNames; delete parentEntries; } else { // simply insert the tree and its parent entry to the lists subtreeNames2.Append (strdup(newName)); topEntries2.Append (parentEntry); } subtree->Close(); delete subtree->Store(); // it was created in tree->Open() } // build the super tree upon the parents MTentry **topEntrArr = new MTentry *[nSamples]; // array of the parent entries for each subtree char **subNameArr = new char *[nSamples]; // array of the subtrees names for (int i=0; i<nSamples; i++) { // convert the lists into arrays topEntrArr[i] = topEntries2.RemoveFront (); subNameArr[i] = subtreeNames2.RemoveFront (); } assert (topEntries2.IsEmpty()); assert (subtreeNames2.IsEmpty()); sprintf (newName, "%s.0", name); BulkLoad (topEntrArr, nSamples, padFactor, newName); // attach each subtree to the leaves of the super tree GiSTpath path; path.MakeRoot (); MTnode *node = (MTnode *) ReadNode (path); GiSTlist<MTnode *> *oldList = new GiSTlist<MTnode *>; // upper level nodes oldList->Append(node); int level = node->Level(); while (level > 0) { // build the leaves list for super tree GiSTlist<MTnode *> *newList = new GiSTlist<MTnode *>; // lower level nodes while (!oldList->IsEmpty()) { node = oldList->RemoveFront(); path = node->Path(); node->SetLevel(node->Level() + minHeight); // update level of the upper nodes of the super tree WriteNode (node); for (int i=0; i<node->NumEntries(); i++) { MTentry *entry = (MTentry *) (*node)[i].Ptr(); path.MakeChild (entry->Ptr()); newList->Append((MTnode *)ReadNode(path)); path.MakeParent (); } delete node; } delete oldList; oldList = newList; level--; } while (!oldList->IsEmpty()) { // attach each subtree to its leaf node = oldList->RemoveFront(); // retrieve next leaf (root of subtree) node->SetLevel(minHeight); // update level of the root of the subtree path = node->Path(); for (int i=0; i<node->NumEntries(); i++) { MTentry *entry = (MTentry *) (*node)[i].Ptr(); path.MakeChild(Store()->Allocate()); MTnode *newNode = (MTnode *) CreateNode (); newNode->Path() = path; entry->SetPtr(path.Page()); path.MakeParent (); int j = 0; for (; entry->object() != topEntrArr[j]->object(); j++); // search the position to append subtree->Open(subNameArr[j]); GiSTpath rootPath; rootPath.MakeRoot (); Append (newNode, (MTnode *)subtree->ReadNode(rootPath)); // append this subtree to the super tree subtree->Close(); delete subtree->Store(); // it was created in tree->Open() delete newNode; } WriteNode (node); delete node; } subtree->Open(subNameArr[0]); // in order to destroy the object tree delete subtree; for (int i=0; i<nSamples; i++) { delete []subNameArr[i]; } delete []subNameArr; delete []topEntrArr; // update radii of the upper nodes of the result M-tree path.MakeRoot (); node = (MTnode *) ReadNode (path); oldList->Append(node); level = node->Level(); while (level >= minHeight) { // build the list of the nodes which radii should be recomputed GiSTlist<MTnode *> *newList = new GiSTlist<MTnode *>; while (!oldList->IsEmpty()) { node = oldList->RemoveFront(); path = node->Path(); for (int i=0; i<node->NumEntries(); i++) { path.MakeChild ((*node)[i].Ptr()->Ptr()); newList->Append((MTnode *)ReadNode(path)); path.MakeParent (); } delete node; } delete oldList; oldList = newList; level--; } while (!oldList->IsEmpty()) { // adjust the radii of the nodes MTnode *node = oldList->RemoveFront(); AdjKeys (node); delete node; } delete oldList; for (int i=0; i<=nCreated; i++) { // delete all temporary subtrees sprintf (newName, "%s.%i", name, i); unlink (newName); } } else { // we can insert all the entries in a single node GiSTpath path; path.MakeRoot (); GiSTnode *node = ReadNode (path); for (int i=0; i<n; i++) { node->Insert(*(data[i])); } assert (!node->IsOverFull(*Store())); WriteNode (node); delete node; } }