Ejemplo n.º 1
0
//retrieve candidate result set by the var_sig in the _query. 
void VSTree::retrieve(SPARQLquery& _query)
{
	Util::logging("IN retrieve");

	//debug
//	{
//	    VNode* temp_ptr = this->getLeafNodeByEntityID(473738);
//	    stringstream _ss;
//
//	    for (int i=0;i<temp_ptr->getChildNum();i++)
//	        if (temp_ptr->getChildEntry(i).getEntityId() == 473738)
//	        {
//	            _ss << "entity id=473738 entry sig:" << endl;
//	            _ss << "entity id=473738 leaf node line: " << temp_ptr->getFileLine() << endl;
//	            _ss << Signature::BitSet2str(temp_ptr->getChildEntry(i).getEntitySig().entityBitSet) << endl;
//	            break;
//	        }
//
//	    _ss << "leaf node sig:" << endl;
//	    _ss << Signature::BitSet2str(temp_ptr->getEntry().getEntitySig().entityBitSet) << endl;
//
//	    temp_ptr = temp_ptr->getFather(*(this->node_buffer));
//	    while (temp_ptr != NULL)
//	    {
//	        _ss << "line=" << temp_ptr->getFileLine() << endl;
//	        _ss << Signature::BitSet2str(temp_ptr->getEntry().getEntitySig().entityBitSet) << endl;
//	        temp_ptr = temp_ptr->getFather(*(this->node_buffer));
//	    }
//	    Util::logging(_ss.str());
//	}

    vector<BasicQuery*>& queryList = _query.getBasicQueryVec();
    // enumerate each BasicQuery and retrieve their variables' mapping entity in the VSTree.
    vector<BasicQuery*>::iterator iter=queryList.begin();
    for(; iter != queryList.end(); iter++)
    {
        int varNum = (*iter)->getVarNum();
        for (int i = 0; i < varNum; i++)
        {
            //debug
        	{
        		std::stringstream _ss;
        		_ss << "retrieve of var: " << i << endl;
        		Util::logging(_ss.str());
        	}
			bool flag = (*iter)->isLiteralVariable(i);
            const EntityBitSet& entityBitSet = (*iter)->getVarBitSet(i);
            IDList* idListPtr = &( (*iter)->getCandidateList(i) );
            this->retrieveEntity(entityBitSet, idListPtr);
#ifdef DEBUG_VSTREE
			stringstream _ss;
			_ss << "total num: " << this->entry_num << endl;
			_ss << "candidate num: " << idListPtr->size() << endl;
			_ss << endl;
			_ss << "isExist 473738: " << (idListPtr->isExistID(473738)?"true":"false") <<endl;
			_ss << "isExist 473472: " << (idListPtr->isExistID(473472)?"true":"false") <<endl;
			_ss << "isExist 473473: " << (idListPtr->isExistID(473473)?"true":"false") <<endl;
			Util::logging(_ss.str());
#endif

			//the basic query should end if one non-literal var has no candidates
			if(idListPtr->size() == 0 && !flag)
			{
				break;
			}
        }
    }
	Util::logging("OUT retrieve");
}
Ejemplo n.º 2
0
int printClusters(NodeSet roots, IDList orphanNodes,
	string clusterListName, string clusterName, 
	vector<float> cutoffs)
{   
	TreeNode *tempNode = 0;
	NodeSetIter setIter;

	NodeList nodeList, tempList;
	NodeListIter nodeIt, tempIt;
	
	IDList OTU;
	IDListIter it;
	
	unsigned int size, numOTUs;
	FILE *clusterListFile, *clusterFile;

	clusterListFile = fopen(clusterListName.c_str(),"wb");
	clusterFile = fopen(clusterName.c_str(),"wb");
	if(clusterListFile == NULL|| clusterFile == NULL)
	{   
		cout << "Cannot open output files. Skipped" << endl;
		return 0;
	}
	printf("\n");

	vector<float>::iterator c;
	float distLevel;
	for(c = cutoffs.begin(); c != cutoffs.end(); c++)
	{   
		distLevel = *(c);
		numOTUs = 0;
		nodeList.clear();
		
		// extract the valid nodes for each distance level
		for(setIter=roots.begin(); setIter!=roots.end(); ++setIter)
		{   
			tempNode=0;
			if(*setIter != 0)
			{   
				if((*setIter)->dist < distLevel || fabs((*setIter)->dist-distLevel) < EPSILON)
				{   
					nodeList.push_front(*setIter);
					continue;
				}

				tempList.push_front(*setIter);
				while (tempList.size()!=0)
				{   
					tempIt=tempList.begin();
					tempNode=(*tempIt);
					tempList.pop_front();

					if (tempNode->left->dist < distLevel || fabs(tempNode->left->dist-distLevel) < EPSILON)
						nodeList.push_front(tempNode->left);						
					else
						tempList.push_front(tempNode->left);

					if (tempNode->right->dist < distLevel || fabs(tempNode->right->dist-distLevel) < EPSILON)
						nodeList.push_front(tempNode->right);
					else
						tempList.push_front(tempNode->right);					
				}
			}
			tempList.clear();
		}

		fprintf(clusterListFile," %.6f ", distLevel);
		fprintf(clusterFile," %.6f ", distLevel);
		
		// write the nodeList to file
		tempList.clear();
		for(nodeIt=nodeList.begin(); nodeIt!=nodeList.end(); ++nodeIt)
		{   
			// clean up and initialize
			fprintf(clusterFile,"|");
			tempNode=0;			
			size=0;
			OTU.clear();
			
			tempList.push_front(*nodeIt);
			
			while(tempList.size()!=0)
			{   
				tempIt=tempList.begin();
				tempNode=(*tempIt);
				tempList.pop_front();
				
				if(tempNode->left==0 && tempNode->right==0)
				{   
					OTU.push_back(tempNode->ID);
					size+=tempNode->numMembers;
				}				
				if (tempNode->right!=0)
					tempList.push_front(tempNode->right);
				if(tempNode->left!=0 )
					tempList.push_front(tempNode->left);
				
			}
			tempList.clear();					
			// print to clusterFile
			it=OTU.begin();
			fprintf(clusterFile,"%u",(*it));
			++it;
			for(;it!=OTU.end(); ++it)
				fprintf(clusterFile," %u",(*it));
			
			fprintf(clusterListFile, "%d ", size);	
			++numOTUs;			
		}
		
		for (it=orphanNodes.begin(); it != orphanNodes.end(); ++it) {
			fprintf(clusterFile,"|%u",(*it));
			fprintf(clusterListFile, "1 ");
		}
		numOTUs += orphanNodes.size();
		
		fprintf(clusterFile,"|\n");
		fprintf(clusterListFile, "\n");		
		printf("Dist: %.6f. numOTUs: %u. numSingletons: %lu\n", distLevel, numOTUs, orphanNodes.size());
	}
	
	printf("\n");
	OTU.clear();    
	fclose(clusterListFile);
	fclose(clusterFile);
	return 1;
}