int * MTnode::PickCandidates() { int max_ind=MIN(NUM_CANDIDATES, NumEntries()), *vec=new int[max_ind], i; BOOL *used=new BOOL[NumEntries()]; for(i=0; i<NumEntries(); i++) used[i]=(((MTentry *)(*this)[i].Ptr())->Key()->distance==0); // insert in vec the indices of the candidates for promotion for(i=0; i<max_ind; i++) { int j; do j=PickRandom(0, NumEntries()); while(used[j]); vec[i]=j; used[j]=TRUE; } return vec; }
int * MTnode::PickCandidates () { int n = NumEntries (); BOOL *bUsed = new BOOL[n]; for (int i=0; i<n; i++) { bUsed[i] = ((MTentry *)(*this)[i].Ptr())->Key()->distance == 0; // exclude parent entry } int count = MIN (NUM_CANDIDATES, n-1), *results = new int[count]; // insert in results the indices of the candidates for promotion for (int i=0; i<count; i++) { int j; do { j = PickRandom (0, n); } while (bUsed[j]); results[i] = j; bUsed[j] = TRUE; } delete []bUsed; return results; }
MTnode * MTnode::PromoteVote() { MTnode *newnode=(MTnode *)NCopy(); int i; switch(PROMOTE_VOTE_FUNCTION) { case RANDOMV: { // complexity: constant // cout << "Random voting: "; // pick a random entry (different from the parent) do i=PickRandom(0, NumEntries()); while(((MTentry *)(*this)[i].Ptr())->Key()->distance==0); // cout << "Entry " << (*this)[i].Ptr() << " chosen.\n"; newnode->obj=&((MTentry *)((*newnode)[i].Ptr()))->object(); break; } case SAMPLINGV: { // complexity: O(kn) distance computations // cout << "Sampling voting: "; int *vec=PickCandidates(), bestcand, bestld, bestrd, *bestlv=new int[NumEntries()], *bestrv=new int[NumEntries()]; double minvalue=MAXDOUBLE, sec_minvalue=MAXDOUBLE, **distances=new double *[MIN(NUM_CANDIDATES, NumEntries())]; // distance matrix // find the candidate with minimum radius for (i=0; i<MIN(NUM_CANDIDATES, NumEntries()); i++) { MTentry *cand=(MTentry *)((*this)[vec[i]].Ptr()), *e1=new MTentry, *e2=new MTentry; MTnode *node1=(MTnode *)Copy(), *node2=(MTnode *)NCopy(); double value, sec_value; int leftdeletes, rightdeletes, *leftvec=new int[NumEntries()], *rightvec=new int[NumEntries()], j; // cout << "Entry " << cand; // initialize distance matrix distances[i]=new double[NumEntries()]; for (j=0; j<NumEntries(); j++) distances[i][j]=((vec[i]==j)? 0: cand->object().distance(((MTentry *)((*this)[j].Ptr()))->object())); for(j=0; j<NumEntries(); j++) ((MTentry *)((*node2)[j].Ptr()))->Key()->distance=distances[i][j]; node1->obj=obj; node2->obj=&((MTentry *)((*this)[vec[i]].Ptr()))->object(); // perform the split node1->Split(node2, leftvec, rightvec, &leftdeletes, &rightdeletes); // given the deletion vectors, do bulk deletes node1->DeleteBulk(leftvec, leftdeletes); node2->DeleteBulk(rightvec, rightdeletes); e1->InitKey(); e2->InitKey(); e1->setobject(*node1->obj); e2->setobject(*node2->obj); e1->setmaxradius(0); e2->setmaxradius(0); e1->setminradius(MAXDOUBLE); e2->setminradius(MAXDOUBLE); // compute the radii node1->mMRadius(e1); node2->mMRadius(e2); // check the result value=MAX(e1->maxradius(), e2->maxradius()); // this is minMAX_RADII sec_value=MIN(e1->maxradius(), e2->maxradius()); if((value<minvalue)||((value==minvalue)&&(sec_value<sec_minvalue))) { int index; minvalue=value; sec_minvalue=sec_value; bestld=leftdeletes; bestrd=rightdeletes; for(index=0; index<leftdeletes; index++) bestlv[index]=leftvec[index]; for(index=0; index<rightdeletes; index++) bestrv[index]=rightvec[index]; bestcand=i; } // be tidy delete e1; delete e2; delete node1; delete node2; delete []leftvec; delete []rightvec; } // cout << "Entry " << (*this)[vec[bestcand]].Ptr() << " chosen.\n"; newnode->obj=&((MTentry *)((*newnode)[vec[bestcand]].Ptr()))->object(); // update the distance of the children from the new parent for (i=0; i<NumEntries(); i++) ((MTentry *)((*newnode)[i].Ptr()))->Key()->distance=distances[bestcand][i]; for (i=0; i<MIN(NUM_CANDIDATES, NumEntries()); i++) delete []distances[i]; delete []distances; delete []vec; delete []bestlv; delete []bestrv; break; } case MAX_LB_DIST: { // complexity: constant double maxdist=-1; int maxcand; // cout << "Largest min dist voting:\n"; if(Tree()->IsOrdered()) maxcand=NumEntries()-1; // if the tree is ordered we can choose the last element else // otherwise we have to search the object which is farthest from the parent for (i=0; i<NumEntries(); i++) { MTentry *e=(MTentry *)((*this)[i].Ptr()); if (e->Key()->distance>maxdist) { maxdist=e->Key()->distance; maxcand=i; } } // cout << "Entry " << (*this)[maxcand].Ptr() << " chosen.\n"; newnode->obj=&((MTentry *)((*newnode)[maxcand].Ptr()))->object(); break; } case mM_RAD: { // complexity: constant double minradius=MAXDOUBLE; int bestcand; // cout << "Best radius voting:\n"; for (i=0; i<NumEntries(); i++) { MTentry *cand=(MTentry *)((*this)[i].Ptr()); double radius=0; if(cand->Key()->distance==0) continue; for (int j=0; j<NumEntries(); j++) { MTentry *e=(MTentry *)((*this)[j].Ptr()); double dmin, dmax; if (i==j) continue; dmin=fabs(cand->Key()->distance-e->Key()->distance); dmax=cand->Key()->distance+e->Key()->distance; switch (RADIUS_FUNCTION) { case LB: radius=MAX(radius, dmin); break; case AVG: radius=MAX(radius, (dmin+dmax)/2); break; case UB: radius=MAX(radius, dmax); break; } } if (radius<minradius) { bestcand=i; minradius=radius; } } // cout << "Entry " << (*this)[bestcand].Ptr() << " chosen.\n"; newnode->obj=&((MTentry *)((*newnode)[bestcand].Ptr()))->object(); break; } } return newnode; }
MTnode * MTnode::PromotePart() { MTnode *newnode; switch(PROMOTE_PART_FUNCTION) { case RANDOM: { // complexity: constant int i, j; // pick two *different* random entries // cout << "Random promotion: "; i=PickRandom(0, NumEntries()); do j=PickRandom(0, NumEntries()); while (j==i); if(((MTentry *)(*this)[j].Ptr())->Key()->distance==0) { int k=i; i=j; j=k; // if we chose the parent entry, put it in the left node } // cout << "Entries " << (*this)[i].Ptr() << " & " << (*this)[j].Ptr() << " chosen.\n"; newnode=(MTnode *)NCopy(); // re-assign the nodes' object newnode->obj=&((MTentry *)((*newnode)[j].Ptr()))->object(); obj=&((MTentry *)((*this)[i].Ptr()))->object(); if(((MTentry *)(*this)[i].Ptr())->Key()->distance>0) { // if the parent object wasn't confirmed, invalidate also the parent InvalidateEntry(TRUE); InvalidateEntries(); } else InvalidateEntry(FALSE); // else, invalidate only the node's radii break; } case CONFIRMED: { // complexity: determined by the confirmed promotion algorithm int i; BOOL isRoot=TRUE; // cout << "Confirmed promotion: "; // for(i=0; (i<NumEntries())&&(isRoot); i++) isRoot=(((MTentry *)((*this)[i].Ptr()))->Key()->distance==-MAXDIST); isRoot=(((MTentry *)((*this)[0].Ptr()))->Key()->distance==-maxDist()); // we have ordered entries if(isRoot) { // if we're splitting the root we have to use a policy that doesn't use stored distances PROMOTE_PART_FUNCTION=SECONDARY_PART_FUNCTION; newnode=PromotePart(); PROMOTE_PART_FUNCTION=CONFIRMED; } else { int index=-1; for(i=0; (i<NumEntries())&&(index<0); i++) if(((MTentry *)((*this)[i].Ptr()))->Key()->distance==0) index=i; obj=&((MTentry *)((*this)[index].Ptr()))->object(); // now choose the right node parent newnode=PromoteVote(); } InvalidateEntry(FALSE); break; } case MAX_UB_DIST: { // complexity: constant double maxdist=-1, maxdist2; int i, maxcand1, maxcand2; BOOL isRoot=TRUE; // cout << "Largest max dist promotion:\n"; // for(i=0; (i<NumEntries())&&(isRoot); i++) isRoot=(((MTentry *)((*this)[i].Ptr()))->Key()->distance==-MAXDIST); isRoot=(((MTentry *)((*this)[0].Ptr()))->Key()->distance==-maxDist()); // we have ordered entries if(isRoot) { // if we're splitting the root we have to use a policy that doesn't use stored distances PROMOTE_PART_FUNCTION=SECONDARY_PART_FUNCTION; newnode=PromotePart(); PROMOTE_PART_FUNCTION=CONFIRMED; } else if(Tree()->IsOrdered()) { // if the tree is ordered we can choose the last two elements maxcand1=NumEntries()-1; maxcand2=NumEntries()-2; } // the following code should be unreachable else // otherwise we have to search the two objects which are farthest from the parent for (i=0; i<NumEntries(); i++) { MTentry *e=(MTentry *)((*this)[i].Ptr()); if (e->Key()->distance>maxdist) { maxdist2=maxdist; maxdist=e->Key()->distance; maxcand2=maxcand1; maxcand1=i; } else if (e->Key()->distance>maxdist2) { maxdist2=e->Key()->distance; maxcand2=i; } } // cout << "Entries " << (*this)[maxcand1].Ptr() << " & " << (*this)[maxcand2].Ptr() << " chosen.\n"; // for sure the parent isn't confirmed (unless we have a binary tree...) obj=&((MTentry *)((*this)[maxcand1].Ptr()))->object(); InvalidateEntry(TRUE); InvalidateEntries(); newnode=(MTnode *)NCopy(); newnode->obj=&((MTentry *)((*newnode)[maxcand2].Ptr()))->object(); break; } case SAMPLING: { // complexity: O(kn) distance computations // cout << "Sampling: "; int *vec=PickCandidates(), i, j, min1, min2, bestld, bestrd, *bestlv=new int[NumEntries()], *bestrv=new int[NumEntries()]; double minvalue=MAXDOUBLE, sec_minvalue=MAXDOUBLE, **distances=new double*[MIN(NUM_CANDIDATES, NumEntries())]; // distance matrix // initialize distance matrix for(i=0; i<MIN(NUM_CANDIDATES, NumEntries()); i++) { distances[i]=new double[NumEntries()]; for(j=0; j<NumEntries(); j++) distances[i][j]=-maxDist(); } for(i=0; i<MIN(NUM_CANDIDATES, NumEntries()); i++) if(((MTentry *)((*this)[vec[i]].Ptr()))->Key()->distance==0) { for(j=0; j<NumEntries(); j++) distances[i][j]=((MTentry *)((*this)[j].Ptr()))->Key()->distance; break; } for(i=0; i<MIN(NUM_CANDIDATES, NumEntries()); i++) distances[i][vec[i]]=0; // find the candidates with minimum radius for(i=1; i<MIN(NUM_CANDIDATES, NumEntries()); i++) for (j=0; j<i; j++) { MTentry *e1=new MTentry, *e2=new MTentry; MTnode *node1=(MTnode *)NCopy(), *node2=(MTnode *)NCopy(); double value, sec_value; int leftdeletes, rightdeletes, *leftvec=new int[NumEntries()], *rightvec=new int[NumEntries()], k; for(k=0; k<NumEntries(); k++) { ((MTentry *)((*node1)[k].Ptr()))->Key()->distance=distances[i][k]; ((MTentry *)((*node2)[k].Ptr()))->Key()->distance=distances[j][k]; } node1->obj=&((MTentry *)((*this)[vec[i]].Ptr()))->object(); node2->obj=&((MTentry *)((*this)[vec[j]].Ptr()))->object(); // perform the split node1->Split(node2, leftvec, rightvec, &leftdeletes, &rightdeletes); for(k=0; k<NumEntries(); k++) { distances[i][k]=((MTentry *)((*node1)[k].Ptr()))->Key()->distance; distances[j][k]=((MTentry *)((*node2)[k].Ptr()))->Key()->distance; } // given the deletion vectors, do bulk deletes node1->DeleteBulk(leftvec, leftdeletes); node2->DeleteBulk(rightvec, rightdeletes); e1->InitKey(); e2->InitKey(); e1->setobject(*node1->obj); e2->setobject(*node2->obj); e1->setmaxradius(0); e2->setmaxradius(0); e1->setminradius(MAXDOUBLE); e2->setminradius(MAXDOUBLE); // compute the radii node1->mMRadius(e1); node2->mMRadius(e2); // check the result value=MAX(e1->maxradius(), e2->maxradius()); // this is minMAX_RADII sec_value=MIN(e1->maxradius(), e2->maxradius()); if((value<minvalue)||((value==minvalue)&&(sec_value<sec_minvalue))) { int index; minvalue=value; sec_minvalue=sec_value; bestld=leftdeletes; bestrd=rightdeletes; for(index=0; index<leftdeletes; index++) bestlv[index]=leftvec[index]; for(index=0; index<rightdeletes; index++) bestrv[index]=rightvec[index]; min1=i; min2=j; } // be tidy delete []leftvec; delete []rightvec; delete node1; delete node2; delete e1; delete e2; } // cout << "Entries " << (*this)[vec[min1]].Ptr() << " & " << (*this)[vec[min2]].Ptr() << " chosen.\n"; if(((MTentry *)(*this)[vec[min2]].Ptr())->Key()->distance>0) newnode=(MTnode *)NCopy(); else newnode=(MTnode *)Copy(); newnode->obj=&((MTentry *)((*newnode)[vec[min2]].Ptr()))->object(); obj=&((MTentry *)((*this)[vec[min1]].Ptr()))->object(); if(((MTentry *)(*this)[vec[min1]].Ptr())->Key()->distance>0) { // if the parent object wasn't confirmed, invalidate also the parent InvalidateEntry(TRUE); InvalidateEntries(); } else InvalidateEntry(FALSE); // else, invalidate only the node's radii for(i=0; i<NumEntries(); i++) { ((MTentry *)((*this)[i].Ptr()))->Key()->distance=distances[min1][i]; ((MTentry *)((*newnode)[i].Ptr()))->Key()->distance=distances[min2][i]; } delete []bestlv; delete []bestrv; for(i=0; i<MIN(NUM_CANDIDATES, NumEntries()); i++) delete []distances[i]; delete []distances; break; } case MIN_RAD: case MIN_OVERLAPS: { // complexity: O(n^2) distance computations int min1, min2, i, j, bestld, bestrd, *bestlv=new int[NumEntries()], *bestrv=new int[NumEntries()]; double minvalue=MAXDOUBLE, sec_minvalue=MAXDOUBLE, **distances=new double *[NumEntries()]; // distance matrix // initialize distance matrix for(i=0; i<NumEntries(); i++) { distances[i]=new double[NumEntries()]; for(j=0; j<NumEntries(); j++) distances[i][j]=-maxDist(); } for(i=0; i<NumEntries(); i++) if(((MTentry *)((*this)[i].Ptr()))->Key()->distance==0) { for(j=0; j<NumEntries(); j++) { distances[i][j]=((MTentry *)((*this)[j].Ptr()))->Key()->distance; distances[j][i]=distances[i][j]; } break; } for(i=0; i<NumEntries(); i++) distances[i][i]=0; // if(PROMOTE_PART_FUNCTION==MIN_RADII) cout << "Min radii promotion: "; // else cout << "Min overlaps promotion: "; for (i=1; i<NumEntries(); i++) for (j=0; j<i; j++) { MTentry *e1=new MTentry, *e2=new MTentry; MTnode *node1=(MTnode *)NCopy(), *node2=(MTnode *)NCopy(); double value, sec_value; int leftdeletes, rightdeletes, *leftvec=new int[NumEntries()], *rightvec=new int[NumEntries()], k; for(k=0; k<NumEntries(); k++) { ((MTentry *)((*node1)[k].Ptr()))->Key()->distance=distances[i][k]; ((MTentry *)((*node2)[k].Ptr()))->Key()->distance=distances[j][k]; } node1->obj=&((MTentry *)((*this)[i].Ptr()))->object(); node2->obj=&((MTentry *)((*this)[j].Ptr()))->object(); // perform the split node1->Split(node2, leftvec, rightvec, &leftdeletes, &rightdeletes); for(k=0; k<NumEntries(); k++) { distances[i][k]=((MTentry *)((*node1)[k].Ptr()))->Key()->distance; distances[j][k]=((MTentry *)((*node2)[k].Ptr()))->Key()->distance; distances[k][i]=distances[i][k]; distances[k][j]=distances[j][k]; } // given the deletion vectors, do bulk deletes node1->DeleteBulk(leftvec, leftdeletes); node2->DeleteBulk(rightvec, rightdeletes); e1->InitKey(); e2->InitKey(); e1->setobject(*node1->obj); e2->setobject(*node2->obj); e1->setmaxradius(0); e2->setmaxradius(0); e1->setminradius(MAXDOUBLE); e2->setminradius(MAXDOUBLE); // compute the radii node1->mMRadius(e1); node2->mMRadius(e2); // check the result if(PROMOTE_PART_FUNCTION==MIN_RAD) { value=MAX(e1->maxradius(), e2->maxradius()); // this is minMAX_RADII sec_value=MIN(e1->maxradius(), e2->maxradius()); } else value=e1->maxradius()+e2->maxradius()-distances[i][j]; if((value<minvalue)||((value==minvalue)&&(sec_value<sec_minvalue))) { int index; minvalue=value; sec_minvalue=sec_value; bestld=leftdeletes; bestrd=rightdeletes; for(index=0; index<leftdeletes; index++) bestlv[index]=leftvec[index]; for(index=0; index<rightdeletes; index++) bestrv[index]=rightvec[index]; min1=i; min2=j; } // be tidy delete []leftvec; delete []rightvec; delete node1; delete node2; delete e1; delete e2; } // cout << "Entries " << (*this)[min1].Ptr() << " & " << (*this)[min2].Ptr() << " chosen.\n"; if(((MTentry *)(*this)[min2].Ptr())->Key()->distance>0) newnode=(MTnode *)NCopy(); else newnode=(MTnode *)Copy(); newnode->obj=&((MTentry *)((*newnode)[min2].Ptr()))->object(); obj=&((MTentry *)((*this)[min1].Ptr()))->object(); if(((MTentry *)(*this)[min1].Ptr())->Key()->distance>0) { // if the parent object wasn't confirmed, invalidate also the parent InvalidateEntry(TRUE); InvalidateEntries(); } else InvalidateEntry(FALSE); // else, invalidate only the node's radii for(i=0; i<NumEntries(); i++) { ((MTentry *)((*this)[i].Ptr()))->Key()->distance=distances[min1][i]; ((MTentry *)((*newnode)[i].Ptr()))->Key()->distance=distances[min2][i]; } delete bestlv; delete bestrv; for(i=0; i<NumEntries(); i++) delete []distances[i]; delete []distances; break; } } return newnode; }
MTnode * MTnode::PromoteVote () { MTnode *newNode = (MTnode *) NCopy (); switch (PROMOTE_VOTE_FUNCTION) { case RANDOMV: { // complexity: constant int i; // pick a random entry (different from the parent) do { i = PickRandom (0, NumEntries()); } while (((MTentry *)(*this)[i].Ptr())->Key()->distance == 0); newNode->obj = &((MTentry *)((*newNode)[i].Ptr()))->object(); break; } case SAMPLINGV: { // complexity: O(kn) distance computations int n = NumEntries (), count = MIN (NUM_CANDIDATES, n-1); int *vec = PickCandidates (), bestCand = 0; double min = MAXDOUBLE, secMin = MAXDOUBLE, **distances = new double *[count]; // distance matrix // find the candidate with minimum radius for (int i=0; i<count; i++) { MTentry *cand = (MTentry *) ((*this)[vec[i]].Ptr()); MTnode *node1 = (MTnode *) Copy (), *node2 = (MTnode *) NCopy (); distances[i] = new double[n]; for (int j=0; j<n; j++) { distances[i][j] = (vec[i]==j) ? 0 : cand->object().distance(((MTentry *)((*this)[j].Ptr()))->object()); // initialize distance matrix ((MTentry *)((*node2)[j].Ptr()))->Key()->distance = distances[i][j]; // if parent entry is i } node1->obj = obj; node2->obj = &((MTentry *)((*this)[vec[i]].Ptr()))->object(); // perform the split int *lVec = new int[n], *rVec = new int[n], lDel, rDel; node1->Split(node2, lVec, rVec, &lDel, &rDel); node1->DeleteBulk(lVec, lDel); node2->DeleteBulk(rVec, rDel); MTentry *entry1 = new MTentry, *entry2 = new MTentry; entry1->InitKey(); entry2->InitKey(); entry1->SetObject(*node1->obj); entry2->SetObject(*node2->obj); entry1->SetMaxRadius(0); entry2->SetMaxRadius(0); entry1->SetMinRadius(MAXDOUBLE); entry2->SetMinRadius(MAXDOUBLE); // compute the radii node1->mMRadius(entry1); node2->mMRadius(entry2); // check the result double val1 = MAX (entry1->MaxRadius(), entry2->MaxRadius()), val2 = MIN (entry1->MaxRadius(), entry2->MaxRadius()); if (val1<min || (val1==min && val2<secMin)) { min = val1; secMin = val2; bestCand = i; } // be tidy delete entry1; delete entry2; delete []lVec; delete []rVec; delete node1; delete node2; } newNode->obj = &((MTentry *)((*newNode)[vec[bestCand]].Ptr()))->object(); // update the distance of the children from the new parent for (int i=0; i<n; i++) { ((MTentry *)((*newNode)[i].Ptr()))->Key()->distance = distances[bestCand][i]; } for (int i=0; i<count; i++) { delete []distances[i]; } delete []distances; delete []vec; break; } case MAX_LB_DISTV: { // complexity: constant double maxDist = -1; int maxCand = 0; if (Tree()->IsOrdered()) { // if the tree is ordered we can choose the last element maxCand = NumEntries() - 1; } else { // otherwise we have to search the object which is farthest from the parent for (int i=0; i<NumEntries(); i++) { MTentry *entry = (MTentry *) (*this)[i].Ptr(); if (entry->Key()->distance > maxDist) { maxDist = entry->Key()->distance; maxCand = i; } } } newNode->obj = &((MTentry *)(*newNode)[maxCand].Ptr())->object(); break; } case mM_RADV: { // complexity: O(n) distance computations int n = NumEntries (); double **distances = new double *[n]; // distance matrix for (int i=0; i<n; i++) { distances[i] = new double[n]; } for (int i=0; i<n; i++) { // initialize distance matrix for (int j=i; j<n; j++) { distances[j][i] = distances[i][j] = -MaxDist(); } } // find the candidate meeting the requirement of minimizing max(1->maxradius, 2->maxradius) double min = MAXDOUBLE; int bestCand = 0; for (int i=0; i<n; i++) { MTentry *cand = (MTentry *) (*this)[i].Ptr(); if (cand->Key()->distance == 0) { // parent entry, actually we can neglect it for (int j=0; j<n; j++) { distances[j][i] = distances[i][j] = ((MTentry *)(*this)[j].Ptr())->Key()->distance; } continue; } MTnode *node1 = (MTnode *) Copy (), *node2 = (MTnode *) NCopy (); for (int j=0; j<n; j++) { distances[j][i] = distances[i][j] = (i==j) ? 0 : (distances[i][j]==-MaxDist() ? cand->object().distance(((MTentry *)(*this)[j].Ptr())->object()) : distances[i][j]); ((MTentry *)(*node2)[j].Ptr())->Key()->distance = distances[i][j]; // if parent entry is i } node1->obj = obj; node2->obj = &((MTentry *)(*this)[i].Ptr())->object(); // perform the split int *lVec = new int[n], *rVec = new int[n], lDel, rDel; node1->Split(node2, lVec, rVec, &lDel, &rDel); node1->DeleteBulk(lVec, lDel); node2->DeleteBulk(rVec, rDel); MTentry *entry1 = new MTentry, *entry2 = new MTentry; entry1->InitKey(); entry2->InitKey(); entry1->SetObject(*node1->obj); entry2->SetObject(*node2->obj); entry1->SetMaxRadius(0); entry2->SetMaxRadius(0); entry1->SetMinRadius(MAXDOUBLE); entry2->SetMinRadius(MAXDOUBLE); node1->mMRadius(entry1); node2->mMRadius(entry2); // check the result double val = MAX (entry1->MaxRadius(), entry2->MaxRadius()); if (val < min) { min = val; bestCand = i; } // be tidy delete entry1; delete entry2; delete []lVec; delete []rVec; delete node1; delete node2; } newNode->obj = &((MTentry *)(*newNode)[bestCand].Ptr())->object(); // update the distance of the children from the new parent for (int i=0; i<n; i++) { ((MTentry *)(*newNode)[i].Ptr())->Key()->distance = distances[bestCand][i]; } for (int i=0; i<n; i++) { delete []distances[i]; } delete []distances; break; } } return newNode; }
MTnode * MTnode::PromotePart () { MTnode *newNode = NULL; switch (PROMOTE_PART_FUNCTION) { case RANDOM: { // pick two *different* random entries, complexity: constant int i = PickRandom (0, NumEntries()); int j; do { j = PickRandom (0, NumEntries()); } while (i == j); if (((MTentry *)(*this)[j].Ptr())->Key()->distance == 0) { // if we chose the parent entry, put it in the left node int temp = i; i = j; j = temp; } // re-assign the nodes' object newNode = (MTnode *) NCopy (); newNode->obj = &((MTentry *)((*newNode)[j].Ptr()))->object(); obj = &((MTentry *)((*this)[i].Ptr()))->object(); if (((MTentry *)(*this)[i].Ptr())->Key()->distance > 0) { // unconfirmed, invalidate also the parent InvalidateEntry (TRUE); InvalidateEntries (); } else { InvalidateEntry (FALSE); // confirmed, invalidate only the node's radii } break; } case CONFIRMED: { // complexity: determined by the confirmed promotion algorithm if (((MTentry *)((*this)[0].Ptr()))->Key()->distance == -MaxDist()) { // if we're splitting the root we have to use a policy that doesn't use stored distances PROMOTE_PART_FUNCTION = SECONDARY_PART_FUNCTION; newNode = PromotePart (); PROMOTE_PART_FUNCTION = CONFIRMED; } else { int index = -1; for (int i=0; i<NumEntries() /*&& index<0*/; i++) { if (((MTentry *)((*this)[i].Ptr()))->Key()->distance == 0) { // parent obj index = i; } } obj = &((MTentry *)((*this)[index].Ptr()))->object(); newNode = PromoteVote (); // now choose the right node parent } InvalidateEntry (FALSE); break; } case SAMPLING: { // complexity: O(kn) distance computations int n = NumEntries (), count = MIN (NUM_CANDIDATES, n-1); double **distances = new double*[count]; // distance matrix int *vec = PickCandidates (); // initialize distance matrix for (int i=0; i<count; i++) { distances[i] = new double[n]; for (int j=0; j<n; j++) { j==vec[i] ? distances[i][j]=0 : distances[i][j]=-MaxDist(); } } // find the candidates with minimum radius int min1, min2; double min = MAXDOUBLE, secMin = MAXDOUBLE; for (int i=1; i<count; i++) { for (int j=0; j<i; j++) { MTnode *node1 = (MTnode *) NCopy (), *node2 = (MTnode *) NCopy (); for (int k=0; k<n; k++) { ((MTentry *)(*node1)[k].Ptr())->Key()->distance = distances[i][k]; ((MTentry *)(*node2)[k].Ptr())->Key()->distance = distances[j][k]; } node1->obj = &((MTentry *)(*this)[vec[i]].Ptr())->object(); node2->obj = &((MTentry *)(*this)[vec[j]].Ptr())->object(); // perform the split int lDel, rDel, *lVec = new int[n], *rVec = new int[n]; node1->Split(node2, lVec, rVec, &lDel, &rDel); for (int k=0; k<n; k++) { distances[i][k] = ((MTentry *)(*node1)[k].Ptr())->Key()->distance; distances[j][k] = ((MTentry *)(*node2)[k].Ptr())->Key()->distance; } node1->DeleteBulk(lVec, lDel); node2->DeleteBulk(rVec, rDel); MTentry *entry1 = new MTentry, *entry2 = new MTentry; entry1->InitKey(); entry2->InitKey(); entry1->SetObject(*node1->obj); entry2->SetObject(*node2->obj); entry1->SetMaxRadius(0); entry2->SetMaxRadius(0); entry1->SetMinRadius(MAXDOUBLE); entry2->SetMinRadius(MAXDOUBLE); node1->mMRadius(entry1); node2->mMRadius(entry2); // check the result double val1 = MAX (entry1->MaxRadius(), entry2->MaxRadius()), val2 = MIN (entry1->MaxRadius(), entry2->MaxRadius()); if (val1<min || (val1==min && val2<secMin)) { min = val1; secMin = val2; min1 = i; min2 = j; } // be tidy delete entry1; delete entry2; delete []lVec; delete []rVec; delete node1; delete node2; } } newNode = (MTnode *) NCopy (); obj = &((MTentry *)(*this)[vec[min1]].Ptr())->object(); newNode->obj = &((MTentry *)(*newNode)[vec[min2]].Ptr())->object(); // the parent object wasn't confirmed, invalidate also the parent InvalidateEntry (TRUE); InvalidateEntries (); for (int i=0; i<n; i++) { ((MTentry *)(*this)[i].Ptr())->Key()->distance = distances[min1][i]; ((MTentry *)(*newNode)[i].Ptr())->Key()->distance = distances[min2][i]; } for (int i=0; i<count; i++) { delete []distances[i]; } delete []distances; break; } case mM_RAD: { // complexity: O(n^2) distance computations int n = NumEntries (); double **distances = new double *[n]; // distance matrix // initialize distance matrix for (int i=0; i<n; i++) { distances[i] = new double[n]; for (int j=0; j<n; j++) { j==i ? distances[i][j]=0 : distances[i][j]=-MaxDist(); } } for (int i=0; i<n; i++) { if (((MTentry *)(*this)[i].Ptr())->Key()->distance == 0) { for (int j=0; j<n; j++) { distances[j][i] = distances[i][j] = ((MTentry *)(*this)[j].Ptr())->Key()->distance; } break; } } int min1, min2; double min = MAXDOUBLE, secMin = MAXDOUBLE; for (int i=1; i<n; i++) { for (int j=0; j<i; j++) { MTnode *node1 = (MTnode *) NCopy (), *node2 = (MTnode *) NCopy (); for (int k=0; k<n; k++) { ((MTentry *)(*node1)[k].Ptr())->Key()->distance = distances[i][k]; ((MTentry *)(*node2)[k].Ptr())->Key()->distance = distances[j][k]; } node1->obj = &((MTentry *)(*this)[i].Ptr())->object(); node2->obj = &((MTentry *)(*this)[j].Ptr())->object(); // perform the split int lDel, rDel, *lVec=new int[n], *rVec=new int[n]; node1->Split(node2, lVec, rVec, &lDel, &rDel); for (int k=0; k<n; k++) { distances[k][i] = distances[i][k] = ((MTentry *)(*node1)[k].Ptr())->Key()->distance; distances[k][j] = distances[j][k] = ((MTentry *)(*node2)[k].Ptr())->Key()->distance; } node1->DeleteBulk(lVec, lDel); node2->DeleteBulk(rVec, rDel); MTentry *entry1 = new MTentry, *entry2 = new MTentry; entry1->InitKey(); entry2->InitKey(); entry1->SetObject(*node1->obj); entry2->SetObject(*node2->obj); entry1->SetMaxRadius(0); entry2->SetMaxRadius(0); entry1->SetMinRadius(MAXDOUBLE); entry2->SetMinRadius(MAXDOUBLE); node1->mMRadius(entry1); node2->mMRadius(entry2); // check the result double val1 = MAX (entry1->MaxRadius(), entry2->MaxRadius()), val2 = MIN (entry1->MaxRadius(), entry2->MaxRadius()); if (val1<min || (val1==min && val2<secMin)) { min = val1; secMin = val2; min1 = i; min2 = j; } // be tidy delete entry1; delete entry2; delete []lVec; delete []rVec; delete node1; delete node2; } } ((MTentry *)(*this)[min2].Ptr())->Key()->distance>0 ? newNode=(MTnode *)NCopy() : newNode=(MTnode *)Copy(); obj = &((MTentry *)(*this)[min1].Ptr())->object(); newNode->obj = &((MTentry *)(*newNode)[min2].Ptr())->object(); if (((MTentry *)(*this)[min1].Ptr())->Key()->distance > 0) { // unconfirmed, invalidate also the parent InvalidateEntry (TRUE); InvalidateEntries (); } else { InvalidateEntry (FALSE); // else, invalidate only the node's radii } for (int i=0; i<n; i++) { ((MTentry *)(*this)[i].Ptr())->Key()->distance = distances[min1][i]; ((MTentry *)(*newNode)[i].Ptr())->Key()->distance = distances[min2][i]; } for (int i=0; i<n; i++) { delete []distances[i]; } delete []distances; break; } } return newNode; }
// load this M-tree with n data using the BulkLoad algorithm [CP98] // data is an array of n entries // padFactor is the maximum node utilization (use 1) // name is the name of the tree void MT::BulkLoad (MTentry **data, int n, double padFactor, const char *name) { int size = 0; if (EntrySize()) { size = n * (sizeof(GiSTpage) + EntrySize()); // (only valid if we've fixed size entries) } else { for (int i=0; i<n; i++) { size += sizeof(GiSTlte) + sizeof(GiSTpage) + data[i]->CompressedLength(); } } int totSize = size + GIST_PAGE_HEADER_SIZE + sizeof(GiSTlte); if (totSize > Store()->PageSize()) { // we need to split the entries into several sub-trees int numEntries = (int)(Store()->PageSize()*padFactor*n) / totSize; int s = (int) MAX (MIN (numEntries, ceil(((float)n)/numEntries)), numEntries*MIN_UTIL); // initial number of samples int nSamples, *samples = new int[s], *sizes = NULL, *ns = NULL, iter = 0, MAXITER = s * s; GiSTlist<double *> *distm = (GiSTlist<double *> *) calloc (s, sizeof(GiSTlist<double *>)); // relative distances between samples int MINSIZE = (int) (Store()->PageSize()*MIN_UTIL), addEntrySize = EntrySize() ? sizeof(GiSTpage) : sizeof(GiSTlte)+sizeof(GiSTpage); GiSTlist<int> *lists = NULL; // set for each sample set GiSTlist<double> *dists = NULL; // set for distance between each sample and its members BOOL *bSampled = new BOOL[n]; // is this entry in the samples set? // sampling phase do { iter++; if (iter > 1) { // this is a new sampling phase while (!lists[0].IsEmpty()) { lists[0].RemoveFront (); dists[0].RemoveFront (); } delete []lists; delete []dists; delete []sizes; delete []ns; while (!distm[0].IsEmpty()) { delete []distm[0].RemoveFront(); // empty the distance list } for (int i=1; i<s; i++) { distm[i].front = distm[i].rear = NULL; } } if (iter >= MAXITER) { cout << "Too many loops in BulkLoad!"<<endl<<"Please select a lower minimum node utilization or a bigger node size."<<endl; exit(1); } for (int i=0; i<n; i++) { bSampled[i] = FALSE; } nSamples = 0; // pick s samples to create parents while (nSamples < s) { int i; do { i = PickRandom (0, n); } while (bSampled[i]); bSampled[i] = TRUE; samples[nSamples++] = i; } lists = new GiSTlist<int>[s]; dists = new GiSTlist<double>[s]; sizes = new int[s]; ns = new int[s]; for (int i=0; i<s; i++) { sizes[i] = GIST_PAGE_HEADER_SIZE + sizeof(GiSTlte); ns[i] = 1; distm[i].Prepend (new double[s]); } // compute the relative distances between samples for (int i=0; i<s; i++) { for (int j=0; j<i; j++) { distm[j].front->entry[i] = distm[i].front->entry[j] = data[samples[j]]->object().distance(data[samples[i]]->object()); } distm[i].front->entry[i] = 0; } // assign each entry to its nearest parent for (int i=0; i<n; i++) { if (bSampled[i]) { int j = 0; for (; samples[j]!=i; j++); // find this entry in the samples set and return position in it lists[j].Prepend (i); // insert the entry in the right sample dists[j].Prepend (0); // distance between sample and data[i] sizes[j] += addEntrySize + data[i]->CompressedLength(); } else { // here we optimize the distance computations (like we do in the insert algorithm) double *dist = new double[s]; // distance between this non-sample and samples dist[0] = data[samples[0]]->object().distance(data[i]->object()); int minIndex = 0; for (int j=1; j<s; j++) { // seek the nearest sample dist[j] = -MaxDist(); if (fabs (data[samples[j]]->Key()->distance - data[i]->Key()->distance) >= dist[minIndex]) { // pruning continue; } BOOL flag = TRUE; for (int k=0; k<j && flag; k++) { // pruning (other samples) if (dist[k] < 0) { continue; } else { flag = fabs (dist[k] - distm[j].front->entry[k]) < dist[minIndex]; } } if (!flag) { continue; } dist[j] = data[samples[j]]->object().distance(data[i]->object()); // have to compute this distance if (dist[j] < dist[minIndex]) { minIndex = j; } } lists[minIndex].Append (i); // insert the entry in the right sample dists[minIndex].Append (dist[minIndex]); // distance between sample and data[i] sizes[minIndex] += addEntrySize + data[i]->CompressedLength(); ns[minIndex]++; sizes[minIndex] >= MINSIZE ? delete []dist : distm[minIndex].Append (dist); // correspond with lists } } // redistribute underfilled parents int i; while (sizes[i = FindMin (sizes, nSamples)] < MINSIZE) { GiSTlist<int> list = lists[i]; // each sample set while (!dists[i].IsEmpty()) { // clear distance between each sample and its members dists[i].RemoveFront (); } // substitute this set with last set for (int j=0; j<nSamples; j++) { for (GiSTlistnode<double *> *node=distm[j].front; node; node=node->next) { node->entry[i] = node->entry[nSamples-1]; } } GiSTlist<double *> dlist = distm[i]; // relative distances between sample[i] and other samples, reposition by myself distm[i] = distm[nSamples-1]; lists[i] = lists[nSamples-1]; dists[i] = dists[nSamples-1]; samples[i] = samples[nSamples-1]; sizes[i] = sizes[nSamples-1]; ns[i] = ns[nSamples-1]; nSamples--; while (!list.IsEmpty()) { // assign each entry to its nearest parent double *dist = dlist.RemoveFront (); // relative distances between sample[i] (old) and other samples (old) int minIndex = -1; for (int j=0; j<nSamples && minIndex<0; j++) { // search for a computed distance if (dist[j] > 0) { minIndex = j; } } int k = list.RemoveFront (); if (minIndex < 0) { // no distance was computed (i.e. all distances were pruned) dist[0] = data[samples[0]]->object().distance(data[k]->object()); minIndex = 0; } for (int j=0; j<nSamples; j++) { if (j == minIndex) { continue; } if (dist[j] < 0) { // distance wasn't computed if (fabs (data[samples[j]]->Key()->distance - data[k]->Key()->distance) >= dist[minIndex]) { continue; // pruning } BOOL flag = TRUE; for (int i=0; i<j && flag; i++) { // pruning (other samples) if (dist[i] < 0) { continue; } else { flag = fabs (dist[i] - distm[j].front->entry[i]) < dist[minIndex]; } } if (!flag) { continue; } dist[j] = data[samples[j]]->object().distance(data[k]->object()); // have to compute this distance } if (dist[j] < dist[minIndex]) { minIndex = j; } } lists[minIndex].Append (k); dists[minIndex].Append (dist[minIndex]); sizes[minIndex] += addEntrySize + data[k]->CompressedLength(); ns[minIndex]++; sizes[minIndex] >= MINSIZE ? delete []dist : distm[minIndex].Append (dist); // correspond with lists } assert (dlist.IsEmpty()); // so is the list } } while (nSamples == 1); // if there's only one child, repeat the sampling phase MTentry ***array = new MTentry **[nSamples]; // array of the entries for each sub-tree for (int i=0; i<nSamples; i++) { // convert the lists into arrays array[i] = new MTentry *[ns[i]]; for (int j=0; j<ns[i]; j++) { array[i][j] = (MTentry *) data[lists[i].RemoveFront ()]->Copy(); array[i][j]->Key()->distance = dists[i].RemoveFront (); } assert (lists[i].IsEmpty()); assert (dists[i].IsEmpty()); } delete []lists; delete []dists; delete []sizes; delete []bSampled; for (int i=0; i<nSamples; i++) { while (!distm[i].IsEmpty()) { delete [](distm[i].RemoveFront()); } } free (distm); // build an M-tree under each parent int nInit = nSamples; MT *subtree = new MT; GiSTlist<char *> subtreeNames; // list of the subtrees names GiSTlist<MTentry *> topEntries; // list of the parent entries of each subtree int nCreated = 0, minHeight = MAXINT; char newName[50]; for (int i=0; i<nInit; i++) { sprintf (newName, "%s.%i", name, ++nCreated); unlink (newName); subtree->Create(newName); // create the new subtree subtree->BulkLoad(array[i], ns[i], padFactor, newName); // build the subtree GiSTpath path; path.MakeRoot (); MTnode *subtreeRoot = (MTnode *) subtree->ReadNode(path); if (subtreeRoot->IsUnderFull(*Store())) { // if the subtree root node is underfilled, we have to split the tree GiSTlist<MTentry *> *parentEntries = new GiSTlist<MTentry *>; GiSTlist<char *> *newTreeNames = subtree->SplitTree(&nCreated, subtree->TreeHeight()-1, parentEntries, name); // split the tree nSamples--; while (!newTreeNames->IsEmpty()) { // insert all the new trees in the subtrees list subtreeNames.Append (newTreeNames->RemoveFront()); MTentry *entry = parentEntries->RemoveFront(); for (int j=0; j<n; j++) { if (data[j]->object() == entry->object()) { // append the parent entry to the list topEntries.Append (data[j]); break; } } delete entry; nSamples++; } delete newTreeNames; delete parentEntries; minHeight = MIN (minHeight, subtree->TreeHeight()-1); } else { subtreeNames.Append (strdup(newName)); topEntries.Append (data[samples[i]]); minHeight = MIN (minHeight, subtree->TreeHeight()); } delete subtreeRoot; subtree->Close(); delete subtree->Store(); // it was created in subtree->Create() } delete []samples; for (int i=0; i<nInit; i++) { for (int j=0; j<ns[i]; j++) { delete array[i][j]; } delete []array[i]; } delete []array; delete []ns; // fix the subtree height GiSTlist<char *> subtreeNames2; // list of the subtrees names GiSTlist<MTentry *> topEntries2; // list of the parent entries of each subtree while (!topEntries.IsEmpty()) { // insert the trees in the list (splitting trees if necessary) MTentry *parentEntry = topEntries.RemoveFront (); char *tmp = subtreeNames.RemoveFront (); strcpy (newName, tmp); delete []tmp; subtree->Open(newName); if (subtree->TreeHeight() > minHeight) { // we have to split the tree to reduce its height nSamples--; GiSTlist<MTentry *> *parentEntries = new GiSTlist<MTentry *>; GiSTlist<char *> *newTreeNames = subtree->SplitTree(&nCreated, minHeight, parentEntries, name); // split the tree while (!newTreeNames->IsEmpty()) { // insert all the new trees in the subtrees list subtreeNames2.Append (newTreeNames->RemoveFront()); MTentry *entry = parentEntries->RemoveFront(); for (int j=0; j<n; j++) { if (data[j]->object() == entry->object()) { // append the parent entry to the parents list topEntries2.Append (data[j]); break;; } } delete entry; nSamples++; } delete newTreeNames; delete parentEntries; } else { // simply insert the tree and its parent entry to the lists subtreeNames2.Append (strdup(newName)); topEntries2.Append (parentEntry); } subtree->Close(); delete subtree->Store(); // it was created in tree->Open() } // build the super tree upon the parents MTentry **topEntrArr = new MTentry *[nSamples]; // array of the parent entries for each subtree char **subNameArr = new char *[nSamples]; // array of the subtrees names for (int i=0; i<nSamples; i++) { // convert the lists into arrays topEntrArr[i] = topEntries2.RemoveFront (); subNameArr[i] = subtreeNames2.RemoveFront (); } assert (topEntries2.IsEmpty()); assert (subtreeNames2.IsEmpty()); sprintf (newName, "%s.0", name); BulkLoad (topEntrArr, nSamples, padFactor, newName); // attach each subtree to the leaves of the super tree GiSTpath path; path.MakeRoot (); MTnode *node = (MTnode *) ReadNode (path); GiSTlist<MTnode *> *oldList = new GiSTlist<MTnode *>; // upper level nodes oldList->Append(node); int level = node->Level(); while (level > 0) { // build the leaves list for super tree GiSTlist<MTnode *> *newList = new GiSTlist<MTnode *>; // lower level nodes while (!oldList->IsEmpty()) { node = oldList->RemoveFront(); path = node->Path(); node->SetLevel(node->Level() + minHeight); // update level of the upper nodes of the super tree WriteNode (node); for (int i=0; i<node->NumEntries(); i++) { MTentry *entry = (MTentry *) (*node)[i].Ptr(); path.MakeChild (entry->Ptr()); newList->Append((MTnode *)ReadNode(path)); path.MakeParent (); } delete node; } delete oldList; oldList = newList; level--; } while (!oldList->IsEmpty()) { // attach each subtree to its leaf node = oldList->RemoveFront(); // retrieve next leaf (root of subtree) node->SetLevel(minHeight); // update level of the root of the subtree path = node->Path(); for (int i=0; i<node->NumEntries(); i++) { MTentry *entry = (MTentry *) (*node)[i].Ptr(); path.MakeChild(Store()->Allocate()); MTnode *newNode = (MTnode *) CreateNode (); newNode->Path() = path; entry->SetPtr(path.Page()); path.MakeParent (); int j = 0; for (; entry->object() != topEntrArr[j]->object(); j++); // search the position to append subtree->Open(subNameArr[j]); GiSTpath rootPath; rootPath.MakeRoot (); Append (newNode, (MTnode *)subtree->ReadNode(rootPath)); // append this subtree to the super tree subtree->Close(); delete subtree->Store(); // it was created in tree->Open() delete newNode; } WriteNode (node); delete node; } subtree->Open(subNameArr[0]); // in order to destroy the object tree delete subtree; for (int i=0; i<nSamples; i++) { delete []subNameArr[i]; } delete []subNameArr; delete []topEntrArr; // update radii of the upper nodes of the result M-tree path.MakeRoot (); node = (MTnode *) ReadNode (path); oldList->Append(node); level = node->Level(); while (level >= minHeight) { // build the list of the nodes which radii should be recomputed GiSTlist<MTnode *> *newList = new GiSTlist<MTnode *>; while (!oldList->IsEmpty()) { node = oldList->RemoveFront(); path = node->Path(); for (int i=0; i<node->NumEntries(); i++) { path.MakeChild ((*node)[i].Ptr()->Ptr()); newList->Append((MTnode *)ReadNode(path)); path.MakeParent (); } delete node; } delete oldList; oldList = newList; level--; } while (!oldList->IsEmpty()) { // adjust the radii of the nodes MTnode *node = oldList->RemoveFront(); AdjKeys (node); delete node; } delete oldList; for (int i=0; i<=nCreated; i++) { // delete all temporary subtrees sprintf (newName, "%s.%i", name, i); unlink (newName); } } else { // we can insert all the entries in a single node GiSTpath path; path.MakeRoot (); GiSTnode *node = ReadNode (path); for (int i=0; i<n; i++) { node->Insert(*(data[i])); } assert (!node->IsOverFull(*Store())); WriteNode (node); delete node; } }