Beispiel #1
0
bool JoinPartition::getNextPartition(vector<RGData> *smallData, uint64_t *partitionID, JoinPartition **jp)
{

	if (fileMode) {
		ByteStream bs;
		RGData rgData;

		if (nextPartitionToReturn > 0)
			return false;

		//cout << "reading the small side" << endl;
		nextSmallOffset = 0;
		while (1) {
			readByteStream(0, &bs);
			if (bs.length() == 0)
				break;
			rgData.deserialize(bs);
			//smallRG.setData(&rgData);
			//cout << "read a smallRG with " << smallRG.getRowCount() << " rows" << endl;
			smallData->push_back(rgData);
		}
		nextPartitionToReturn = 1;
		*partitionID = uniqueID;
		*jp = this;
		return true;
	}

	bool ret = false;
	while (!ret && nextPartitionToReturn < bucketCount) {
		ret = buckets[nextPartitionToReturn]->getNextPartition(smallData, partitionID, jp);
		if (!ret)
			nextPartitionToReturn++;
	}
	return ret;
}
Beispiel #2
0
boost::shared_ptr<RGData> JoinPartition::getNextLargeRGData()
{
	boost::shared_ptr<RGData> ret;

	ByteStream bs;
	readByteStream(1, &bs);
	if (bs.length() != 0) {
		ret.reset(new RGData());
		ret->deserialize(bs);
	}
	else {
		boost::filesystem::remove(largeFilename);
		largeSizeOnDisk = 0;
	}
	return ret;
}
Beispiel #3
0
 virtual bool readStackObject()
 {
     uint64_t iIndex;
     if(!readVUInt(iIndex))
     {
         setError("Expected stack object");
         return false;
     }
     lua_State *L = m_L;
     if(lua_type(L, 1) != LUA_TTABLE)
     {
         // Ensure that index #1 is self environment
         lua_getfenv(L, 1);
         lua_replace(L, 1);
     }
     if(iIndex >= PERSIST_TCOUNT)
     {
         iIndex += 1 - PERSIST_TCOUNT;
         if(iIndex < (uint64_t)INT_MAX)
             lua_rawgeti(L, 1, (int)iIndex);
         else
         {
             lua_pushnumber(L, (lua_Number)iIndex);
             lua_rawget(L, 1);
         }
         if(lua_isnil(L, -1))
         {
             setError("Cycle while depersisting permanent object key or userdata metatable");
             return false;
         }
     }
     else
     {
         uint8_t iType = (uint8_t)iIndex;
         switch(iType)
         {
         case LUA_TNIL:
             lua_pushnil(L);
             break;
         case PERSIST_TPERMANENT: {
             uint64_t iOldIndex = m_iNextIndex;
             ++m_iNextIndex; // Temporary marker
             lua_rawgeti(L, 1, 0); // Permanents table
             if(!readStackObject())
                 return false;
             lua_gettable(L, -2);
             lua_replace(L, -2);
             // Replace marker with actual object
             uint64_t iNewIndex = m_iNextIndex;
             m_iNextIndex = iOldIndex;
             saveStackObject();
             m_iNextIndex = iNewIndex;
             break; }
         case LUA_TBOOLEAN:
             lua_pushboolean(L, 0);
             break;
         case PERSIST_TTRUE:
             lua_pushboolean(L, 1);
             break;
         case LUA_TSTRING: {
             size_t iLength;
             if(!readVUInt(iLength))
                 return false;
             while(iLength > m_iStringBufferLength)
             {
                 m_iStringBufferLength *= 2;
                 m_sStringBuffer = (char*)realloc(m_sStringBuffer, m_iStringBufferLength);
             }
             if(!readByteStream((uint8_t*)m_sStringBuffer, iLength))
                 return false;
             lua_pushlstring(L, m_sStringBuffer, iLength);
             saveStackObject();
             break; }
         case LUA_TTABLE:
             lua_newtable(L);
             saveStackObject();
             if(!lua_checkstack(L, 8))
                 return false;
             if(!readTableContents())
                 return false;
             break;
         case PERSIST_TTABLE_WITH_META:
             lua_newtable(L);
             saveStackObject();
             if(!lua_checkstack(L, 8))
                 return false;
             if(!readStackObject())
                 return false;
             lua_setmetatable(L, -2);
             if(!readTableContents())
                 return false;
             break;
         case LUA_TNUMBER: {
             double fValue;
             if(!readByteStream(reinterpret_cast<uint8_t*>(&fValue), sizeof(double)))
                 return false;
             lua_pushnumber(L, fValue);
             break; }
         case LUA_TFUNCTION: {
             if(!lua_checkstack(L, 8))
                 return false;
             uint64_t iOldIndex = m_iNextIndex;
             ++m_iNextIndex; // Temporary marker
             if(!readStackObject())
                 return false;
             lua_call(L, 0, 2);
             // Replace marker with closure
             uint64_t iNewIndex = m_iNextIndex;
             m_iNextIndex = iOldIndex;
             saveStackObject();
             m_iNextIndex = iNewIndex;
             // Set upvalues
             lua_insert(L, -2);
             int iNups, i;
             if(!readVUInt(iNups))
                 return false;
             size_t iIDSize;
             if(!readVUInt(iIDSize))
                 return false;
             for(i = 0; i < iNups; ++i)
             {
                 if(!readStackObject())
                     return false;
                 // For now, just skip over the upvalue IDs. In the future,
                 // the ID may be used to rejoin shared upvalues.
                 if(!readByteStream(NULL, iIDSize))
                     return false;
             }
             lua_call(L, iNups, 0);
             // Read environment
             if(!readStackObject())
                     return false;
             lua_setfenv(L, -2);
             break; }
         case PERSIST_TPROTOTYPE: {
             if(!lua_checkstack(L, 8))
                 return false;
             uint64_t iOldIndex = m_iNextIndex;
             ++m_iNextIndex; // Temporary marker
             int iNups;
             if(!readVUInt(iNups))
                 return false;
             if(iNups == 0)
                 lua_pushliteral(L, "return function() end,");
             else
             {
                 lua_pushliteral(L, "local ");
                 lua_checkstack(L, (iNups + 1) * 2);
                 for(int i = 0; i < iNups; ++i)
                 {
                     if(i != 0)
                         lua_pushliteral(L, ",");
                     if(!readStackObject())
                         return false;
                     if(lua_type(L, -1) != LUA_TSTRING)
                     {
                         setError("Upvalue name not a string");
                         return false;
                     }
                 }
                 lua_concat(L, iNups * 2 - 1);
                 lua_pushliteral(L, ";return function(...)");
                 lua_pushvalue(L, -2);
                 lua_pushliteral(L, "=...end,");
                 lua_concat(L, 5);
             }
             // Fetch name and then lookup filename and code
             if(!readStackObject())
                 return false;
             lua_pushliteral(L, "@");
             lua_rawgeti(L, 1, -1);
             lua_pushvalue(L, -3);
             lua_gettable(L, -2);
             lua_replace(L, -2);
             if(lua_isnil(L, -1))
             {
                 setError(lua_pushfstring(L, "Unable to depersist prototype"
                     " \'%s\'", lua_tostring(L, -3)));
                 return false;
             }
             lua_concat(L, 2); // Prepend the @ to the filename
             lua_rawgeti(L, 1, -2);
             lua_pushvalue(L, -3);
             lua_gettable(L, -2);
             lua_replace(L, -2);
             lua_remove(L, -3);
             // Construct the closure factory
             LoadMultiBuffer_t ls;
             ls.s[0] = lua_tolstring(L, -3, &ls.i[0]);
             ls.s[1] = lua_tolstring(L, -1, &ls.i[1]);
             if(lua_load(L, LoadMultiBuffer_t::load_fn, &ls, lua_tostring(L, -2)) != 0)
             {
                 // Should never happen
                 lua_error(L);
                 return false;
             }
             lua_replace(L, -4);
             lua_pop(L, 2);
             // Replace marker with closure factory
             uint64_t iNewIndex = m_iNextIndex;
             m_iNextIndex = iOldIndex;
             saveStackObject();
             m_iNextIndex = iNewIndex;
             break; }
         case LUA_TUSERDATA: {
             bool bHasSetMetatable = false;
             uint64_t iOldIndex = m_iNextIndex;
             ++m_iNextIndex; // Temporary marker
             // Read metatable
             if(!readStackObject())
                 return false;
             lua_getfield(L, -1, "__depersist_size");
             if(!lua_isnumber(L, -1))
             {
                 setError("Userdata missing __depersist_size metafield");
                 return false;
             }
             lua_newuserdata(L, (size_t)lua_tonumber(L, -1));
             lua_replace(L, -2);
             // Replace marker with userdata
             uint64_t iNewIndex = m_iNextIndex;
             m_iNextIndex = iOldIndex;
             saveStackObject();
             m_iNextIndex = iNewIndex;
             // Perform immediate initialisation
             lua_getfield(L, -2, "__pre_depersist");
             if(lua_isnil(L, -1))
                 lua_pop(L, 1);
             else
             {
                 // Set metatable now, as pre-depersister may expect it
                 // NB: Setting metatable if there isn't a pre-depersister
                 // is not a good idea, as if there is an error while the
                 // environment table is being de-persisted, then the __gc
                 // handler of the userdata will eventually be called with
                 // the userdata's contents still being uninitialised.
                 lua_pushvalue(L, -3);
                 lua_setmetatable(L, -3);
                 bHasSetMetatable = true;
                 lua_pushvalue(L, -2);
                 lua_call(L, 1, 0);
             }
             // Read environment
             if(!readStackObject())
                 return false;
             lua_setfenv(L, -2);
             // Set metatable and read the raw data
             if(!bHasSetMetatable)
             {
                 lua_pushvalue(L, -2);
                 lua_setmetatable(L, -2);
             }
             lua_getfield(L, -2, "__depersist");
             if(lua_isnil(L, -1))
                 lua_pop(L, 1);
             else
             {
                 lua_pushvalue(L, -2);
                 lua_rawgeti(L, 1, -3);
                 lua_call(L, 2, 1);
                 if(lua_toboolean(L, -1) != 0)
                 {
                     lua_pop(L, 1);
                     lua_rawgeti(L, 1, -3);
                     lua_getmetatable(L, -1);
                     lua_replace(L, -2);
                     lua_pushvalue(L, -2);
                     lua_rawseti(L, -2, (int)lua_objlen(L, -2) + 1);
                 }
                 lua_pop(L, 1);
             }
             lua_replace(L, -2);
             uint64_t iSyncMarker;
             if(!readVUInt(iSyncMarker))
                 return false;
             if(iSyncMarker != 0x42)
             {
                 setError("sync fail");
                 return false;
             }
             break; }
         case PERSIST_TINTEGER: {
             uint16_t iValue;
             if(!readVUInt(iValue))
                 return false;
             lua_pushinteger(L, iValue);
             break; }
         default:
             lua_pushliteral(L, "Unable to depersist values of type \'");
             if(iType <= LUA_TTHREAD)
                 lua_pushstring(L, lua_typename(L, iType));
             else
             {
                 switch(iType)
                 {
                 case PERSIST_TPERMANENT:
                     lua_pushliteral(L, "permanent"); break;
                 case PERSIST_TTRUE:
                     lua_pushliteral(L, "boolean-true"); break;
                 case PERSIST_TTABLE_WITH_META:
                     lua_pushliteral(L, "table-with-metatable"); break;
                 case PERSIST_TINTEGER:
                     lua_pushliteral(L, "integer"); break;
                 case PERSIST_TPROTOTYPE:
                     lua_pushliteral(L, "prototype"); break;
                 case PERSIST_TRESERVED1:
                     lua_pushliteral(L, "reserved1"); break;
                 case PERSIST_TRESERVED2:
                     lua_pushliteral(L, "reserved2"); break;
                 }
             }
             lua_pushliteral(L, "\'");
             lua_concat(L, 3);
             setError(lua_tostring(L, -1));
             lua_pop(L, 1);
             return false;
         }
     }
     return true;
 }
Beispiel #4
0
int64_t JoinPartition::convertToSplitMode()
{
	int i, j;
	ByteStream bs;
	RGData rgData;
	uint32_t hash;
	uint64_t tmp;
	int64_t ret = -(int64_t)smallSizeOnDisk;    // smallFile gets deleted
	boost::scoped_array<uint32_t> rowDist(new uint32_t[bucketCount]);
	uint32_t rowCount = 0;

	memset(rowDist.get(), 0, sizeof(uint32_t) * bucketCount);
	fileMode = false;
	htSizeEstimate = 0;
	smallSizeOnDisk = 0;
	buckets.reserve(bucketCount);
	for (i = 0; i < (int) bucketCount; i++)
		buckets.push_back(boost::shared_ptr<JoinPartition>(new JoinPartition(*this, false)));

	RowGroup &rg = smallRG;
	Row &row = smallRow;
	nextSmallOffset = 0;
	while (1) {
		readByteStream(0, &bs);
		if (bs.length() == 0)
			break;
		rgData.deserialize(bs);
		rg.setData(&rgData);
		for (j = 0; j < (int) rg.getRowCount(); j++) {
			rg.getRow(j, &row);

			if (antiWithMatchNulls && hasNullJoinColumn(row)) {
				if (needsAllNullRows || !gotNullRow) {
					for (j = 0; j < (int) bucketCount; j++)
						ret += buckets[j]->insertSmallSideRow(row);
					gotNullRow = true;
				}
				continue;
			}

			if (typelessJoin)
				hash = getHashOfTypelessKey(row, smallKeyCols, hashSeed) % bucketCount;
			else {
				if (UNLIKELY(row.isUnsigned(smallKeyCols[0])))
					tmp = row.getUintField(smallKeyCols[0]);
				else
					tmp = row.getIntField(smallKeyCols[0]);
				hash = hasher((char *) &tmp, 8, hashSeed);
				hash = hasher.finalize(hash, 8) % bucketCount;
			}
			rowCount++;
			rowDist[hash]++;
			ret += buckets[hash]->insertSmallSideRow(row);
		}
	}
	boost::filesystem::remove(smallFilename);
	smallFilename.clear();

	for (i = 0; i < (int) bucketCount; i++)
		if (rowDist[i] == rowCount)
			throw IDBExcept("All rows hashed to the same bucket", ERR_DBJ_DATA_DISTRIBUTION);

	rg.setData(&buffer);
	rg.resetRowGroup(0);
	rg.getRow(0, &row);

	return ret;
}