예제 #1
0
파일: MTnode.cpp 프로젝트: jsc0218/MxTree
void
MTnode::InvalidateEntry (BOOL bNew)
{
	GiSTpath path = Path ();
	if (path.Level() > 1) {  // len>=3
		MTnode *parentNode = ((MT *)Tree())->ParentNode((MTnode *)this);
		for (int i=0; i<parentNode->NumEntries(); i++) {  // search the entry in the parent's node
			MTentry *entry = (MTentry *) (*parentNode)[i].Ptr();
			if (entry->Ptr() == path.Page()) {
				if (bNew) {
					entry->Key()->distance = -MaxDist();
				}
				entry->Key()->splitted = TRUE;
				break;
			}
		}
		path.MakeParent ();
		MTnode *grandNode = ((MT *)Tree())->ParentNode(parentNode);
		for (int i=0; i<grandNode->NumEntries(); i++) {  // search the entry in the grandparent's node
			MTentry *entry = (MTentry *) (*grandNode)[i].Ptr();
			if (entry->Ptr() == path.Page()) {
				entry->SetMaxRadius(-1);
				break;
			}
		}
		((MT *)Tree())->WriteNode(parentNode);  // write parent node (in inconsistent state)
		((MT *)Tree())->WriteNode(grandNode);  // write grandparent node (to invalidate the parent's entry)
		delete parentNode;
		delete grandNode;
	}
}
예제 #2
0
파일: GiST.cpp 프로젝트: voidcycles/m3
void
GiST::ShortenTree()
{
	GiSTpath path;
	// Shorten the tree if necessary (This should only be done if root actually changed!)
	path.MakeRoot();
	GiSTnode *root=ReadNode(path);

	if(!root->IsLeaf()&&root->NumEntries()==1) {
		path.MakeChild((*root)[0]->Ptr());
		GiSTnode *child=ReadNode(path);

		store->Deallocate(path.Page());
		child->SetSibling(0);
		child->Path().MakeRoot();
		WriteNode(child);
		delete child;
	}
	delete root;
}
예제 #3
0
파일: MXTree.cpp 프로젝트: jsc0218/MxTree
void MXTree::Split(GiSTnode **node, const GiSTentry& entry)
{
	double radii[2], dist, *dists = new double[(*node)->NumEntries()*2];
	int pageNums[2], cands[2];
	vector<vector<int>> vec(2);
	((MXTnode *)(*node))->TestPromotion(radii, &dist, pageNums, cands, dists, vec);
	if (Trade((*node)->Path().IsRoot(), radii, dist, pageNums, ((MXTnode *)(*node))->GetPageNum()+1, (*node)->NumEntries())) {
		// don't split now
		delete[] dists;
		GiSTpath oldPath = (*node)->Path();

		int startPage = ((*node)->Path().IsRoot() ? rootPage : (*node)->Path().Page());
		int pageNum = ((MXTnode *)(*node))->GetPageNum();
		((MXTfile *)store)->Deallocate(startPage, pageNum);
		startPage = ((MXTfile *)store)->Allocate(++pageNum);
		(*node)->Path().MakeSibling(startPage);
		rootPage = ((*node)->Path().IsRoot() ? startPage : rootPage);
		((MXTnode *)(*node))->SetPageNum(pageNum);
		WriteNode(*node);

		if (!(*node)->Path().IsRoot() && startPage != oldPath.Page()) {
			GiSTpath parentPath = oldPath;
			parentPath.MakeParent();
			GiSTnode *parentNode = ReadNode(parentPath);
			GiSTentry *e = parentNode->SearchPtr(oldPath.Page());
			assert(e != NULL);
			int pos = e->Position();
			e->SetPtr(startPage);
			parentNode->DeleteEntry(pos);
			parentNode->InsertBefore(*e, pos);
			WriteNode(parentNode);
			delete parentNode;
			delete e;
		}
	} else {
		// split now
		bool bLeft = false, bNewRoot = false;

		if ((*node)->Path().IsRoot()) {
			bNewRoot = true;
			(*node)->Path().MakeChild(rootPage);
			rootPage = store->Allocate();
		}

		int oldPageNum = ((MXTnode *)(*node))->GetPageNum();
		GiSTnode *node2 = ((MXTnode *)(*node))->PickSplit(cands, dists, vec);
		delete[] dists;
		int curPageNum = ((MXTnode *)(*node))->GetPageNum();
		assert(oldPageNum >= curPageNum);
		if (oldPageNum > curPageNum) {
			((MXTfile *)store)->Deallocate((*node)->Path().Page()+curPageNum, oldPageNum-curPageNum);
		}
		node2->Path().MakeSibling(((MXTfile *)store)->Allocate(((MXTnode *)node2)->GetPageNum()));

		WriteNode(*node);
		WriteNode(node2);
	
		GiSTentry *e = (*node)->SearchPtr(entry.Ptr());
		if (e != NULL) {
			bLeft = true;
			delete e;
		}
	
		GiSTentry *e1 = (*node)->Union();
		GiSTentry *e2 = node2->Union();
	
		e1->SetPtr((*node)->Path().Page());
		e2->SetPtr(node2->Path().Page());
		// Create new root if root is being split
		if (bNewRoot) {
			GiSTnode *root = NewNode(this);
			root->SetLevel((*node)->Level() + 1);
			root->InsertBefore(*e1, 0);
			root->InsertBefore(*e2, 1);
			root->Path().MakeRoot();
			WriteNode(root);
			delete root;
		} else {
			// Insert entry for N' in parent
			GiSTpath parentPath = (*node)->Path();
			parentPath.MakeParent();
			GiSTnode *parent = ReadNode(parentPath);
			// Find the entry for N in parent
			GiSTentry *e = parent->SearchPtr((*node)->Path().Page());
			assert(e != NULL);
			// Insert the new entry right after it
			int pos = e->Position();
			parent->DeleteEntry(pos);
			parent->InsertBefore(*e1, pos);
			parent->InsertBefore(*e2, pos+1);
			delete e;
			if (!parent->IsOverFull(*store)) {
				WriteNode(parent);
			} else {
				Split(&parent, bLeft? *e1: *e2);  // parent is the node which contains the entry inserted
				GiSTpage page = (*node)->Path().Page();
				(*node)->Path() = parent->Path();  // parent's path may change
				(*node)->Path().MakeChild(page);
				page = node2->Path().Page();
				node2->Path() = (*node)->Path();
				node2->Path().MakeSibling(page);
			}
			delete parent;
		}
		if (!bLeft) {
			delete *node;
			*node = node2;  // return it
		} else {
			delete node2;
		}
		delete e1;
		delete e2;
	}
}
예제 #4
0
파일: BulkLoad.cpp 프로젝트: jsc0218/MxTree
// load this M-tree with n data using the BulkLoad algorithm [CP98]
// data is an array of n entries
// padFactor is the maximum node utilization (use 1)
// name is the name of the tree
void
MT::BulkLoad (MTentry **data, int n, double padFactor, const char *name)
{
	int size = 0;
	if (EntrySize()) {
		size = n * (sizeof(GiSTpage) + EntrySize());  // (only valid if we've fixed size entries)
	} else {
		for (int i=0; i<n; i++) {
			size += sizeof(GiSTlte) + sizeof(GiSTpage) + data[i]->CompressedLength();
		}
	}
	int totSize = size + GIST_PAGE_HEADER_SIZE + sizeof(GiSTlte);

	if (totSize > Store()->PageSize()) {  // we need to split the entries into several sub-trees
		int numEntries = (int)(Store()->PageSize()*padFactor*n) / totSize;
		int s = (int) MAX (MIN (numEntries, ceil(((float)n)/numEntries)), numEntries*MIN_UTIL);  // initial number of samples
		int nSamples, *samples = new int[s], *sizes = NULL, *ns = NULL, iter = 0, MAXITER = s * s;
		GiSTlist<double *> *distm = (GiSTlist<double *> *) calloc (s, sizeof(GiSTlist<double *>));  // relative distances between samples
		int MINSIZE = (int) (Store()->PageSize()*MIN_UTIL), addEntrySize = EntrySize() ? sizeof(GiSTpage) : sizeof(GiSTlte)+sizeof(GiSTpage);
		GiSTlist<int> *lists = NULL;  // set for each sample set
		GiSTlist<double> *dists = NULL;  // set for distance between each sample and its members
		BOOL *bSampled = new BOOL[n];  // is this entry in the samples set?

		// sampling phase
		do {
			iter++;
			if (iter > 1) {  // this is a new sampling phase
				while (!lists[0].IsEmpty()) {
					lists[0].RemoveFront ();
					dists[0].RemoveFront ();
				}
				delete []lists;
				delete []dists;
				delete []sizes;
				delete []ns;
				while (!distm[0].IsEmpty()) {
					delete []distm[0].RemoveFront();  // empty the distance list
				}
				for (int i=1; i<s; i++) {
					distm[i].front = distm[i].rear = NULL;
				}
			}
			if (iter >= MAXITER) {
				cout << "Too many loops in BulkLoad!"<<endl<<"Please select a lower minimum node utilization or a bigger node size."<<endl;
				exit(1);
			}

			for (int i=0; i<n; i++) {
				bSampled[i] = FALSE;
			}
			nSamples = 0;
			// pick s samples to create parents
			while (nSamples < s) {
				int i;
				do {
					i = PickRandom (0, n);
				} while (bSampled[i]);
				bSampled[i] = TRUE;
				samples[nSamples++] = i;
			}

			lists = new GiSTlist<int>[s];
			dists = new GiSTlist<double>[s];
			sizes = new int[s];
			ns = new int[s];
			for (int i=0; i<s; i++) {
				sizes[i] = GIST_PAGE_HEADER_SIZE + sizeof(GiSTlte);
				ns[i] = 1;
				distm[i].Prepend (new double[s]);
			}

			// compute the relative distances between samples
			for (int i=0; i<s; i++) {
				for (int j=0; j<i; j++) {
					distm[j].front->entry[i] = distm[i].front->entry[j] = data[samples[j]]->object().distance(data[samples[i]]->object());
				}
				distm[i].front->entry[i] = 0;
			}

			// assign each entry to its nearest parent
			for (int i=0; i<n; i++) {
				if (bSampled[i]) {
					int j = 0;
					for (; samples[j]!=i; j++);  // find this entry in the samples set and return position in it
					lists[j].Prepend (i);  // insert the entry in the right sample
					dists[j].Prepend (0);  // distance between sample and data[i]
					sizes[j] += addEntrySize + data[i]->CompressedLength();
				} else {  // here we optimize the distance computations (like we do in the insert algorithm)
					double *dist = new double[s];  // distance between this non-sample and samples
					dist[0] = data[samples[0]]->object().distance(data[i]->object());
					int minIndex = 0;
					for (int j=1; j<s; j++) {  // seek the nearest sample
						dist[j] = -MaxDist();
						if (fabs (data[samples[j]]->Key()->distance - data[i]->Key()->distance) >= dist[minIndex]) {  // pruning
							continue;
						}
						BOOL flag = TRUE;
						for (int k=0; k<j && flag; k++) {  // pruning (other samples)
							if (dist[k] < 0) {
								continue;
							} else {
								flag = fabs (dist[k] - distm[j].front->entry[k]) < dist[minIndex];
							}
						}
						if (!flag) {
							continue;
						}
						dist[j] = data[samples[j]]->object().distance(data[i]->object());  // have to compute this distance
						if (dist[j] < dist[minIndex]) {
							minIndex = j;
						}
					}
					lists[minIndex].Append (i);  // insert the entry in the right sample
					dists[minIndex].Append (dist[minIndex]);  // distance between sample and data[i]
					sizes[minIndex] += addEntrySize + data[i]->CompressedLength();
					ns[minIndex]++;
					sizes[minIndex] >= MINSIZE ? delete []dist : distm[minIndex].Append (dist);  // correspond with lists
				}
			}

			// redistribute underfilled parents
			int i;
			while (sizes[i = FindMin (sizes, nSamples)] < MINSIZE) {
				GiSTlist<int> list = lists[i];  // each sample set
				while (!dists[i].IsEmpty()) {  // clear distance between each sample and its members
					dists[i].RemoveFront ();
				}

				// substitute this set with last set
				for (int j=0; j<nSamples; j++) {
					for (GiSTlistnode<double *> *node=distm[j].front; node; node=node->next) {
						node->entry[i] = node->entry[nSamples-1];
					}
				}
				GiSTlist<double *> dlist = distm[i];  // relative distances between sample[i] and other samples, reposition by myself

				distm[i] = distm[nSamples-1];
				lists[i] = lists[nSamples-1];
				dists[i] = dists[nSamples-1];
				samples[i] = samples[nSamples-1];
				sizes[i] = sizes[nSamples-1];
				ns[i] = ns[nSamples-1];
				nSamples--;
				while (!list.IsEmpty()) {  // assign each entry to its nearest parent
					double *dist = dlist.RemoveFront ();  // relative distances between sample[i] (old) and other samples (old)
					int minIndex = -1;
					for (int j=0; j<nSamples && minIndex<0; j++) {  // search for a computed distance
						if (dist[j] > 0) {
							minIndex = j;
						}
					}
					int k = list.RemoveFront ();
					if (minIndex < 0) {  // no distance was computed (i.e. all distances were pruned)
						dist[0] = data[samples[0]]->object().distance(data[k]->object());
						minIndex = 0;
					}
					for (int j=0; j<nSamples; j++) {
						if (j == minIndex) {
							continue;
						}
						if (dist[j] < 0) {  // distance wasn't computed
							if (fabs (data[samples[j]]->Key()->distance - data[k]->Key()->distance) >= dist[minIndex]) {
								continue;  // pruning
							}
							BOOL flag = TRUE;
							for (int i=0; i<j && flag; i++) { // pruning (other samples)
								if (dist[i] < 0) {
									continue;
								} else {
									flag = fabs (dist[i] - distm[j].front->entry[i]) < dist[minIndex];
								}
							}
							if (!flag) {
								continue;
							}
							dist[j] = data[samples[j]]->object().distance(data[k]->object());  // have to compute this distance
						}
						if (dist[j] < dist[minIndex]) {
							minIndex = j;
						}
					}
					lists[minIndex].Append (k);
					dists[minIndex].Append (dist[minIndex]);
					sizes[minIndex] += addEntrySize + data[k]->CompressedLength();
					ns[minIndex]++;
					sizes[minIndex] >= MINSIZE ? delete []dist : distm[minIndex].Append (dist);  // correspond with lists
				}
				assert (dlist.IsEmpty());  // so is the list
			}
		} while (nSamples == 1);  // if there's only one child, repeat the sampling phase
		MTentry ***array = new MTentry **[nSamples];  // array of the entries for each sub-tree
		for (int i=0; i<nSamples; i++) {  // convert the lists into arrays
			array[i] = new MTentry *[ns[i]];
			for (int j=0; j<ns[i]; j++) {
				array[i][j] = (MTentry *) data[lists[i].RemoveFront ()]->Copy();
				array[i][j]->Key()->distance = dists[i].RemoveFront ();
			}
			assert (lists[i].IsEmpty());
			assert (dists[i].IsEmpty());
		}
		delete []lists;
		delete []dists;
		delete []sizes;
		delete []bSampled;
		for (int i=0; i<nSamples; i++) {
			while (!distm[i].IsEmpty()) {
				delete [](distm[i].RemoveFront());
			}
		}
		free (distm);

		// build an M-tree under each parent
		int nInit = nSamples;
		MT *subtree = new MT;
		GiSTlist<char *> subtreeNames;  // list of the subtrees names
		GiSTlist<MTentry *> topEntries;  // list of the parent entries of each subtree
		int nCreated = 0, minHeight = MAXINT;
		char newName[50];
		for (int i=0; i<nInit; i++) {
			sprintf (newName, "%s.%i", name, ++nCreated);
			unlink (newName);
			subtree->Create(newName);  // create the new subtree
			subtree->BulkLoad(array[i], ns[i], padFactor, newName);  // build the subtree

			GiSTpath path;
			path.MakeRoot ();
			MTnode *subtreeRoot = (MTnode *) subtree->ReadNode(path);
			if (subtreeRoot->IsUnderFull(*Store())) {  // if the subtree root node is underfilled, we have to split the tree
				GiSTlist<MTentry *> *parentEntries = new GiSTlist<MTentry *>;
				GiSTlist<char *> *newTreeNames = subtree->SplitTree(&nCreated, subtree->TreeHeight()-1, parentEntries, name);  // split the tree
				nSamples--;
				while (!newTreeNames->IsEmpty()) {  // insert all the new trees in the subtrees list
					subtreeNames.Append (newTreeNames->RemoveFront());
					MTentry *entry = parentEntries->RemoveFront();
					for (int j=0; j<n; j++) {
						if (data[j]->object() == entry->object()) {  // append the parent entry to the list
							topEntries.Append (data[j]);
							break;
						}
					}
					delete entry;
					nSamples++;
				}
				delete newTreeNames;
				delete parentEntries;
				minHeight = MIN (minHeight, subtree->TreeHeight()-1);
			} else {
				subtreeNames.Append (strdup(newName));
				topEntries.Append (data[samples[i]]);
				minHeight = MIN (minHeight, subtree->TreeHeight());
			}
			delete subtreeRoot;
			subtree->Close();
			delete subtree->Store();  // it was created in subtree->Create()
		}
		delete []samples;
		for (int i=0; i<nInit; i++)  {
			for (int j=0; j<ns[i]; j++) {
				delete array[i][j];
			}
			delete []array[i];
		}
		delete []array;
		delete []ns;

		// fix the subtree height
		GiSTlist<char *> subtreeNames2;  // list of the subtrees names
		GiSTlist<MTentry *> topEntries2;  // list of the parent entries of each subtree
		while (!topEntries.IsEmpty()) {  // insert the trees in the list (splitting trees if necessary)
			MTentry *parentEntry = topEntries.RemoveFront ();
			char *tmp = subtreeNames.RemoveFront ();
			strcpy (newName, tmp);
			delete []tmp;
			subtree->Open(newName);
			if (subtree->TreeHeight() > minHeight) {  // we have to split the tree to reduce its height
				nSamples--;
				GiSTlist<MTentry *> *parentEntries = new GiSTlist<MTentry *>;
				GiSTlist<char *> *newTreeNames = subtree->SplitTree(&nCreated, minHeight, parentEntries, name);  // split the tree
				while (!newTreeNames->IsEmpty()) {  // insert all the new trees in the subtrees list
					subtreeNames2.Append (newTreeNames->RemoveFront());
					MTentry *entry = parentEntries->RemoveFront();
					for (int j=0; j<n; j++) {
						if (data[j]->object() == entry->object()) {  // append the parent entry to the parents list
							topEntries2.Append (data[j]);
							break;;
						}
					}
					delete entry;
					nSamples++;
				}
				delete newTreeNames;
				delete parentEntries;
			} else {  // simply insert the tree and its parent entry to the lists
				subtreeNames2.Append (strdup(newName));
				topEntries2.Append (parentEntry);
			}
			subtree->Close();
			delete subtree->Store();  // it was created in tree->Open()
		}

		// build the super tree upon the parents
		MTentry **topEntrArr = new MTentry *[nSamples];  // array of the parent entries for each subtree
		char **subNameArr = new char *[nSamples];  // array of the subtrees names
		for (int i=0; i<nSamples; i++) {  // convert the lists into arrays
			topEntrArr[i] = topEntries2.RemoveFront ();
			subNameArr[i] = subtreeNames2.RemoveFront ();
		}
		assert (topEntries2.IsEmpty());
		assert (subtreeNames2.IsEmpty());
		sprintf (newName, "%s.0", name);
		BulkLoad (topEntrArr, nSamples, padFactor, newName);
		// attach each subtree to the leaves of the super tree
		GiSTpath path;
		path.MakeRoot ();
		MTnode *node = (MTnode *) ReadNode (path);
		GiSTlist<MTnode *> *oldList = new GiSTlist<MTnode *>;  // upper level nodes
		oldList->Append(node);
		int level = node->Level();
		while (level > 0) {  // build the leaves list for super tree
			GiSTlist<MTnode *> *newList = new GiSTlist<MTnode *>;  // lower level nodes
			while (!oldList->IsEmpty()) {
				node = oldList->RemoveFront();
				path = node->Path();
				node->SetLevel(node->Level() + minHeight);  // update level of the upper nodes of the super tree
				WriteNode (node);
				for (int i=0; i<node->NumEntries(); i++) {
					MTentry *entry = (MTentry *) (*node)[i].Ptr();
					path.MakeChild (entry->Ptr());
					newList->Append((MTnode *)ReadNode(path));
					path.MakeParent ();
				}
				delete node;
			}
			delete oldList;
			oldList = newList;
			level--;
		}
		while (!oldList->IsEmpty()) {  // attach each subtree to its leaf
			node = oldList->RemoveFront();  // retrieve next leaf (root of subtree)
			node->SetLevel(minHeight);  // update level of the root of the subtree
			path = node->Path();
			for (int i=0; i<node->NumEntries(); i++) {
				MTentry *entry = (MTentry *) (*node)[i].Ptr();
				path.MakeChild(Store()->Allocate());
				MTnode *newNode = (MTnode *) CreateNode ();
				newNode->Path() = path;
				entry->SetPtr(path.Page());
				path.MakeParent ();
				int j = 0;
				for (; entry->object() != topEntrArr[j]->object(); j++);  // search the position to append
				subtree->Open(subNameArr[j]);
				GiSTpath rootPath;
				rootPath.MakeRoot ();
				Append (newNode, (MTnode *)subtree->ReadNode(rootPath));  // append this subtree to the super tree
				subtree->Close();
				delete subtree->Store();  // it was created in tree->Open()
				delete newNode;
			}
			WriteNode (node);
			delete node;
		}
		subtree->Open(subNameArr[0]);  // in order to destroy the object tree
		delete subtree;
		for (int i=0; i<nSamples; i++) {
			delete []subNameArr[i];
		}
		delete []subNameArr;
		delete []topEntrArr;

		// update radii of the upper nodes of the result M-tree
		path.MakeRoot ();
		node = (MTnode *) ReadNode (path);
		oldList->Append(node);
		level = node->Level();
		while (level >= minHeight) {  // build the list of the nodes which radii should be recomputed
			GiSTlist<MTnode *> *newList = new GiSTlist<MTnode *>;
			while (!oldList->IsEmpty()) {
				node = oldList->RemoveFront();
				path = node->Path();
				for (int i=0; i<node->NumEntries(); i++) {
					path.MakeChild ((*node)[i].Ptr()->Ptr());
					newList->Append((MTnode *)ReadNode(path));
					path.MakeParent ();
				}
				delete node;
			}
			delete oldList;
			oldList = newList;
			level--;
		}
		while (!oldList->IsEmpty()) {  // adjust the radii of the nodes
			MTnode *node = oldList->RemoveFront();
			AdjKeys (node);
			delete node;
		}
		delete oldList;
		for (int i=0; i<=nCreated; i++) {  // delete all temporary subtrees
			sprintf (newName, "%s.%i", name, i);
			unlink (newName);
		}
	} else {  // we can insert all the entries in a single node
		GiSTpath path;
		path.MakeRoot ();
		GiSTnode *node = ReadNode (path);
		for (int i=0; i<n; i++) {
			node->Insert(*(data[i]));
		}
		assert (!node->IsOverFull(*Store()));
		WriteNode (node);
		delete node;
	}
}