Example #1
0
//TODO: what if pre or neighbor not exist in Query? how to ensure the containment?!
void 
Signature::encodeEdge2Entity(EntityBitSet& _entity_bs, int _pre_id, int _neighbor_id, const char _type)
{
	//switch(_type)
	//{
		//case Util::EDGE_IN:
			//break;
		//case Util::EDGE_OUT:
			//break;
		//default:
			//cout<<"error in Signature::encodeEdge2Entity() - non seen type"<<endl;
			//break;
	//}

	int seed_num = _pre_id % Signature::EDGE_SIG_INTERVAL_NUM;
	if (_type == Util::EDGE_OUT)
	{
		seed_num += Signature::ENTITY_SIG_INTERVAL_HALF;
	}
	if (Util::is_literal_ele(_neighbor_id))
	{
		seed_num += Signature::EDGE_SIG_INTERVAL_NUM;
	}

	int base = Signature::ENTITY_SIG_INTERVAL_BASE * seed_num;
	int seed = _pre_id * 5003 % 49957;
	int pos = (seed % Signature::PRE_SIG_BASE) + base;
	_entity_bs.set(pos);
	base += Signature::PRE_SIG_BASE;
	seed = _neighbor_id * 5003 % 49957;
	pos = (seed % Signature::STR_SIG_LENGTH) + base;
	_entity_bs.set(pos);
}
Example #2
0
void
Signature::encodePredicate2Entity(EntityBitSet& _entity_bs, TYPE_PREDICATE_ID _pre_id, const char _type)
{
	//NOTICE:this not used now
	if (Signature::PREDICATE_ENCODE_METHOD == 0)
	{
		//WARN:change if need to use again, because the encoding method has changed now!
		unsigned pos = ((_pre_id + 10) % Signature::EDGE_SIG_LENGTH) + Signature::STR_SIG_LENGTH;
		_entity_bs.set(pos);
	}
	else
	{
		//NOTICE: in * maybe the unsigned will overflow
		long long id = _pre_id;
		unsigned seed_num = id % Signature::EDGE_SIG_INTERVAL_NUM_HALF;

		//int pos = Signature::STR_SIG_LENGTH;
		if (_type == Util::EDGE_OUT)
		{
			seed_num += Signature::EDGE_SIG_INTERVAL_NUM_HALF;
			//pos += Signature::EDGE_SIG_IN;
		}

		//unsigned primeSize = 5;
		//unsigned prime1[]={5003,5009,5011,5021,5023};
		//unsigned prime2[]={49943,49957,49991,49993,49999};

		//NOTICE: more ones in the bitset(use more primes) means less conflicts, but also weakens the filtration of VSTree.
		// when the data set is big enough, cutting down the size of candidate list should come up to our primary consideration.
		// in this case we should not encode too many ones in entities' signature.
		// also, when the data set is small, hash conflicts can hardly happen.
		// therefore, I think using 2 primes(set up two ones in bitset) is enough.
		// --by hanshuo.
		//unsigned primeSize = 2;
		//unsigned prime1[] = {5003, 5011};
		//unsigned prime2[] = {49957, 49993};

		//for(unsigned i = 0; i < primeSize; i++)
		//{
		//unsigned seed = _pre_id * prime1[i] % prime2[i];
		//unsigned pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
		//_entity_bs.set(pos);
		//}
		//unsigned seed = id * 5003 % 49957;
		//unsigned pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
		//_entity_bs.set(pos);

		long long seed = id * 5003 % 49957;
		seed = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
		_entity_bs.set(seed);
	}
}
Example #3
0
//NOTICE: no need to encode itself because only variable in query need to be filtered!
//So only consider all neighbors!
void 
Signature::encodeStr2Entity(const char* _str, EntityBitSet& _entity_bs) 
{
	//_str is subject or object or literal
    if(strlen(_str) >0 && _str[0] == '?')
        return;

    int length = (int)strlen(_str);
    unsigned int hashKey = 0;
    unsigned int pos = 0;
    char *str2 = (char*)calloc(length + 1, sizeof(char));
    strcpy(str2, _str);
    char *str = str2;

	unsigned base = Signature::STR_SIG_BASE * (Signature::HASH_NUM - 1);
	for(int i = Signature::HASH_NUM - 1; i >= 0; --i)
	{
		HashFunction hf = Util::hash[i];
		if(hf == NULL)
			break;
		hashKey = hf(str);
		str=str2;
		pos = base + hashKey % Signature::STR_SIG_BASE;
		base -= Signature::STR_SIG_BASE;
		if(_str[0] == '"')
		{
			pos += Signature::STR_SIG_LENGTH2;
		}
		else if(_str[0] != '<')
		{
#ifdef DEBUG_VSTREE
			cerr << "error in encodeStr2Entity(): neighbor is neither a literal or entity!" << endl;
#endif
		}
		_entity_bs.set(pos);
	}
	//BETTER: use multiple threads for different hash functions

#ifdef DEBUG_VSTREE
        //std::stringstream _ss;
        //_ss << "encodeStr2Entity:" << str2 << endl;
        //Util::logging(_ss.str());
#endif
		free(str2);
}
Example #4
0
void 
Signature::encodeEdge2Entity(EntityBitSet& _entity_bs, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _neighbor_id, const char _type)
{
	Signature::encodePredicate2Entity(_entity_bs, _pre_id, _type);
	
#ifdef DEBUG
	//if(_neighbor_id == 438460)
	//{
		//cout<<"predicate encoded"<<endl;
	//}
#endif

	Signature::encodeStr2Entity(_entity_bs, _neighbor_id, _type);

//	for(int i = 800; i < _entity_bs.size(); i++){
//		_entity_bs.set(i);
//	} 
   //encode predicate and entity together
    int x = _pre_id % Signature::STR_AND_EDGE_INTERVAL_BASE;
    int y = _neighbor_id % Signature::STR_AND_EDGE_INTERVAL_BASE;
    int seed = x + (x + y + 1) * (x + y) / 2;
    seed %= Signature::STR_AND_EDGE_INTERVAL_BASE;
    seed = seed + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_LENGTH;
    if(Util::is_literal_ele(_neighbor_id))
	{
		seed += (Signature::STR_AND_EDGE_INTERVAL_BASE * 2);
	}
	else //entity part
	{
		//entity can be in edge or out edge
		if (_type == Util::EDGE_OUT)
		{
			seed += Signature::STR_AND_EDGE_INTERVAL_BASE;
		}
	}
	_entity_bs.set(seed);
}
Example #5
0
//NOTICE: no need to encode itself because only variable in query need to be filtered!
//So only consider all neighbors!
void
Signature::encodeStr2Entity(EntityBitSet& _entity_bs, TYPE_ENTITY_LITERAL_ID _neighbor_id, const char _type)
{
	//NOTICE: we assume the parameter is always valid(invalid args should not be passed here)
	long long id = _neighbor_id;
	//NOTICE: in * maybe the unsigned will overflow
	//long long seed = id * 5003 % 49957;
	//seed = seed % Signature::STR_SIG_INTERVAL_BASE;
	//seed = seed + (id % Signature::STR_SIG_INTERVAL_NUM) * Signature::STR_SIG_INTERVAL_BASE;

	int seed = _neighbor_id % Signature::STR_SIG_LITERAL;

	if(Util::is_literal_ele(_neighbor_id))
	{
		seed += Signature::STR_SIG_ENTITY;
	}
	else //entity part
	{
		//entity can be in edge or out edge
		if (_type == Util::EDGE_OUT)
		{
			seed += Signature::STR_SIG_LITERAL;
		}
	}

	//if(_neighbor_id == 438460)
	//{
		//cout<<_neighbor_id<<" "<<seed<<endl;
	//}

	_entity_bs.set(seed);

	//_str is subject or object or literal
	//if (strlen(_str) >0 && _str[0] == '?')
		//return;
	//unsigned length = (unsigned)strlen(_str);
	//unsigned unsigned hashKey = 0;
	//unsigned unsigned pos = 0;
	//char *str2 = (char*)calloc(length + 1, sizeof(char));
	//strcpy(str2, _str);
	//char *str = str2;
	//unsigned base = Signature::STR_SIG_BASE * (Signature::HASH_NUM - 1);
	//for (unsigned i = Signature::HASH_NUM - 1; i >= 0; --i)
	//{
		//HashFunction hf = Util::hash[i];
		//if (hf == NULL)
			//break;
		//hashKey = hf(str);
		//str = str2;
		//pos = base + hashKey % Signature::STR_SIG_BASE;
		//base -= Signature::STR_SIG_BASE;
		//if (_str[0] == '"')
		//{
			//pos += Signature::STR_SIG_LENGTH2;
		//}
		//else if (_str[0] != '<')
		//{
//#ifdef DEBUG_VSTREE
			//cerr << "error in encodeStr2Entity(): neighbor is neither a literal or entity!" << endl;
//#endif
		//}
		//_entity_bs.set(pos);
	//}
	//BETTER: use multiple threads for different hash functions

#ifdef DEBUG_VSTREE
	//std::stringstream _ss;
	//_ss << "encodeStr2Entity:" << str2 << endl;
	//Util::logging(_ss.str());
#endif
	//free(str2);
}