int64_t JoinPartition::processLargeBuffer(RGData &rgData) { RowGroup &rg = largeRG; Row &row = largeRow; int64_t ret = 0; int i, j; rg.setData(&rgData); //if (rootNode) // cout << "largeside RGData: " << rg.toString() << endl; /* Need to fail a query with an anti join, an FE filter, and a NULL row on the large side b/c it needs to be joined with the entire small side table. */ if (antiWithMatchNulls && needsAllNullRows) { rg.getRow(0, &row); for (i = 0; i < (int) rg.getRowCount(); i++, row.nextRow()) { for (j = 0; j < (int) largeKeyCols.size(); j++) { if (row.isNullValue(largeKeyCols[j])) throw QueryDataExcept("", ERR_DBJ_ANTI_NULL); } } } if (fileMode) { ByteStream bs; rg.serializeRGData(bs); //cout << "writing large RGData: " << rg.toString() << endl; ret = writeByteStream(1, bs); //cout << "wrote " << ret << " bytes" << endl; } else { uint64_t hash, tmp; int i; for (i = 0; i < (int) rg.getRowCount(); i++) { rg.getRow(i, &row); if (typelessJoin) hash = getHashOfTypelessKey(row, largeKeyCols, hashSeed) % bucketCount; else { if (UNLIKELY(row.isUnsigned(largeKeyCols[0]))) tmp = row.getUintField(largeKeyCols[0]); else tmp = row.getIntField(largeKeyCols[0]); hash = hasher((char *) &tmp, 8, hashSeed); hash = hasher.finalize(hash, 8) % bucketCount; } //cout << "large side hashing row: " << row.toString() << endl; ret += buckets[hash]->insertLargeSideRow(row); } } largeSizeOnDisk += ret; return ret; }
void writePrototype(lua_Debug *pProtoInfo, int iInstanceIndex) { lua_State *L = m_L; // Sanity checks if(pProtoInfo->source[0] != '@') { // @ denotes that the source was a file // (http://www.lua.org/manual/5.1/manual.html#lua_Debug) setError("Can only persist Lua functions defined in source files"); return; } if(strcmp(pProtoInfo->what, "Lua") != 0) { // what == "C" should have been caught by writeObjectRaw(). // what == "tail" should be impossible. // Hence "Lua" and "main" should be the only values seen. // NB: Chunks are not functions defined *in* source files, because // chunks *are* source files. setError(lua_pushfstring(L, "Cannot persist entire Lua chunks (%s)", pProtoInfo->source + 1)); lua_pop(L, 1); return; } // Attempt cached lookup (prototypes are not publicly visible Lua objects, // and hence cannot be cached in the normal way of self's environment). lua_getmetatable(L, 1); lua_pushfstring(L, "%s:%d", pProtoInfo->source + 1, pProtoInfo->linedefined); lua_pushvalue(L, -1); lua_rawget(L, -3); if(!lua_isnil(L, -1)) { uint64_t iValue = (uint64_t)lua_tonumber(L, -1); lua_pop(L, 3); writeVUInt(iValue + PERSIST_TCOUNT - 1); return; } lua_pop(L, 1); lua_pushvalue(L, -1); lua_pushnumber(L, (lua_Number)m_iNextIndex++); lua_rawset(L, -4); uint8_t iType = PERSIST_TPROTOTYPE; writeByteStream(&iType, 1); // Write upvalue names writeVUInt(pProtoInfo->nups); for(int i = 1; i <= pProtoInfo->nups; ++i) { lua_pushstring(L, lua_getupvalue(L, iInstanceIndex, i)); writeStackObject(-1); lua_pop(L, 2); } // Write the function's persist name lua_rawgeti(L, -2, 1); lua_replace(L, -3); lua_rawget(L, -2); if(lua_isnil(L, -1)) { setError(lua_pushfstring(L, "Lua functions must be given a unique " "persistable name in order to be persisted (attempt to persist" " %s:%d)", pProtoInfo->source + 1, pProtoInfo->linedefined)); lua_pop(L, 2); return; } writeStackObject(-1); lua_pop(L, 2); }
int writeObjectRaw() { lua_State *L = m_L; uint8_t iType; // Save the index to the cache lua_pushvalue(L, 2); lua_pushnumber(L, (lua_Number)(m_iNextIndex++)); lua_settable(L, 1); // Lookup the object in the permanents table lua_pushvalue(L, 2); lua_gettable(L, luaT_upvalueindex(1)); if(lua_type(L, -1) != LUA_TNIL) { // Object is in the permanents table. uint8_t iType = PERSIST_TPERMANENT; writeByteStream(&iType, 1); // Replace self's environment with self (for call to writeStackObject) lua_pushvalue(L, luaT_upvalueindex(2)); lua_replace(L, 1); // Write the key corresponding to the permanent object writeStackObject(-1); } else { // Object is not in the permanents table. lua_pop(L, 1); switch(lua_type(L, 2)) { // LUA_TNIL handled in writeStackObject // LUA_TBOOLEAN handled in writeStackObject // LUA_TNUMBER handled in writeStackObject case LUA_TSTRING: { iType = LUA_TSTRING; writeByteStream(&iType, 1); // Strings are simple: length and then bytes (not null terminated) size_t iLength; const char *sString = lua_tolstring(L, 2, &iLength); writeVUInt(iLength); writeByteStream(reinterpret_cast<const uint8_t*>(sString), iLength); break; } case LUA_TTABLE: { // Replace self's environment with self (for calls to writeStackObject) lua_pushvalue(L, luaT_upvalueindex(2)); lua_replace(L, 1); // Save env and insert prior to table lua_getfenv(L, 1); lua_insert(L, 2); int iTable = 3; table_reentry: // Handle the metatable if(lua_getmetatable(L, iTable)) { iType = PERSIST_TTABLE_WITH_META; writeByteStream(&iType, 1); writeStackObject(-1); lua_pop(L, 1); } else { iType = LUA_TTABLE; writeByteStream(&iType, 1); } // Write the children as key, value pairs lua_pushnil(L); while(lua_next(L, iTable)) { writeStackObject(-2); // The naive thing to do now would be writeStackObject(-1) // but this can easily lead to Lua's C call stack limit // being hit. To reduce the likelyhood of this happening, // we check to see if about to write another table. if(lua_type(L, -1) == LUA_TTABLE) { lua_pushvalue(L, -1); lua_rawget(L, 2); lua_pushvalue(L, -2); lua_gettable(L, luaT_upvalueindex(1)); if(lua_isnil(L, -1) && lua_isnil(L, -2)) { lua_pop(L, 2); lua_checkstack(L, 10); iTable += 2; lua_pushvalue(L, iTable); lua_pushnumber(L, (lua_Number)(m_iNextIndex++)); lua_settable(L, 2); goto table_reentry; table_resume: iTable -= 2; } else { lua_pop(L, 2); writeStackObject(-1); } } else writeStackObject(-1); lua_pop(L, 1); } // Write a nil to mark the end of the children (as nil is the // only value which cannot be used as a key in a table). iType = LUA_TNIL; writeByteStream(&iType, 1); if(iTable != 3) goto table_resume; break; } case LUA_TFUNCTION: if(lua_iscfunction(L, 2)) { setErrorObject(2); setError("Cannot persist C functions"); } else { iType = LUA_TFUNCTION; writeByteStream(&iType, 1); // Replace self's environment with self (for calls to writeStackObject) lua_pushvalue(L, luaT_upvalueindex(2)); lua_replace(L, 1); // Write the prototype (the part of a function which is common across // multiple closures - see LClosure / Proto in Lua's lobject.h). lua_Debug proto_info; lua_pushvalue(L, 2); lua_getinfo(L, ">Su", &proto_info); writePrototype(&proto_info, 2); // Write the values of the upvalues // If available, also write the upvalue IDs (so that in // the future, we could hypothetically rejoin shared // upvalues). An ID is just an opaque sequence of bytes. writeVUInt(proto_info.nups); #if LUA_VERSION_NUM >= 502 writeVUInt(sizeof(void*)); #else writeVUInt(0); #endif for(int i = 1; i <= proto_info.nups; ++i) { lua_getupvalue(L, 2, i); writeStackObject(-1); #if LUA_VERSION_NUM >= 502 void *pUpvalueID = lua_upvalueid(L, 2, i); writeByteStream((uint8_t*)&pUpvalueID, sizeof(void*)); #endif } // Write the environment table lua_getfenv(L, 2); writeStackObject(-1); lua_pop(L, 1); } break; case LUA_TUSERDATA: if(!_checkThatUserdataCanBeDepersisted(2)) break; // Replace self's environment with self (for calls to writeStackObject) lua_pushvalue(L, luaT_upvalueindex(2)); lua_replace(L, 1); // Write type, metatable, and then environment iType = LUA_TUSERDATA; writeByteStream(&iType, 1); writeStackObject(-1); lua_getfenv(L, 2); writeStackObject(-1); lua_pop(L, 1); // Write the raw data if(lua_type(L, -1) == LUA_TTABLE) { lua_getfield(L, -1, "__persist"); if(lua_isnil(L, -1)) lua_pop(L, 1); else { lua_pushvalue(L, 2); lua_pushvalue(L, luaT_upvalueindex(2)); lua_call(L, 2, 0); } } writeVUInt((uint64_t)0x42); // sync marker break; default: setError(lua_pushfstring(L, "Cannot persist %s values", luaL_typename(L, 2))); break; } } lua_pushnumber(L, 0); return 1; }
virtual void writeStackObject(int iIndex) { lua_State *L = m_L; // Convert index from relative to absolute if(iIndex < 0 && iIndex > LUA_REGISTRYINDEX) iIndex = lua_gettop(L) + 1 + iIndex; // Basic types always have their value written int iType = lua_type(L, iIndex); if(iType == LUA_TNIL || iType == LUA_TNONE) { uint8_t iByte = LUA_TNIL; writeByteStream(&iByte, 1); } else if(iType == LUA_TBOOLEAN) { uint8_t iByte; if(lua_toboolean(L, iIndex)) iByte = PERSIST_TTRUE; else iByte = LUA_TBOOLEAN; writeByteStream(&iByte, 1); } else if(iType == LUA_TNUMBER) { double fValue = lua_tonumber(L, iIndex); if(floor(fValue) == fValue && 0.0 <= fValue && fValue <= 16383.0) { // Small integers are written as just a few bytes // NB: 16383 = 2^14-1, which is the maximum value which // can fit into two bytes of VUInt. uint8_t iByte = PERSIST_TINTEGER; writeByteStream(&iByte, 1); uint16_t iValue = (uint16_t)fValue; writeVUInt(iValue); } else { // Other numbers are written as an 8 byte double uint8_t iByte = LUA_TNUMBER; writeByteStream(&iByte, 1); writeByteStream(reinterpret_cast<uint8_t*>(&fValue), sizeof(double)); } } else { // Complex values are cached, and are only written once (if this weren't // done, then cycles in the object graph would break things). lua_getfenv(L, 1); lua_pushvalue(L, iIndex); lua_gettable(L, -2); // Might (indirectly) call writeObjectRaw uint64_t iValue = (uint64_t)lua_tonumber(L, -1); lua_pop(L, 2); if(iValue != 0) { // If the value has not previously been written, then writeObjectRaw // would have been called, and the appropriate data written, and 0 // would be returned. Otherwise, the index would be returned, which // we offset by the number of types, and then write. writeVUInt(iValue + PERSIST_TCOUNT - 1); } } }
virtual void fastWriteStackObject(int iIndex) { lua_State *L = m_L; if(lua_type(L, iIndex) != LUA_TUSERDATA) { writeStackObject(iIndex); return; } // Convert index from relative to absolute if(iIndex < 0 && iIndex > LUA_REGISTRYINDEX) iIndex = lua_gettop(L) + 1 + iIndex; // Check for no cycle lua_getfenv(L, 1); lua_pushvalue(L, iIndex); lua_rawget(L, -2); lua_rawgeti(L, -2, 1); lua_pushvalue(L, iIndex); lua_gettable(L, -2); lua_replace(L, -2); if(!lua_isnil(L, -1) || !lua_isnil(L, -2)) { lua_pop(L, 3); writeStackObject(iIndex); return; } lua_pop(L, 2); // Save the index to the cache lua_pushvalue(L, iIndex); lua_pushnumber(L, (lua_Number)(m_iNextIndex++)); lua_settable(L, -3); if(!_checkThatUserdataCanBeDepersisted(iIndex)) return; // Write type, metatable, and then environment uint8_t iType = LUA_TUSERDATA; writeByteStream(&iType, 1); writeStackObject(-1); lua_getfenv(L, iIndex); writeStackObject(-1); lua_pop(L, 1); // Write the raw data if(lua_type(L, -1) == LUA_TTABLE) { lua_getfield(L, -1, "__persist"); if(lua_isnil(L, -1)) lua_pop(L, 1); else { lua_pushvalue(L, iIndex); lua_checkstack(L, 20); lua_CFunction fn = lua_tocfunction(L, -2); fn(L); lua_pop(L, 2); } } writeVUInt((uint64_t)0x42); // sync marker lua_pop(L, 1); }
int64_t JoinPartition::processSmallBuffer(RGData &rgData) { RowGroup &rg = smallRG; Row &row = smallRow; int64_t ret = 0; rg.setData(&rgData); //if (rootNode) //cout << "smallside RGData: " << rg.toString() << endl; if (fileMode) { ByteStream bs; rg.serializeRGData(bs); //cout << "writing RGData: " << rg.toString() << endl; ret = writeByteStream(0, bs); //cout << "wrote " << ret << " bytes" << endl; /* Check whether this partition is now too big -> convert to split mode. The current estimate is based on 100M 4-byte rows = 4GB. The total size is the amount stored in RowGroups in mem + the size of the hash table. The RowGroups in that case use 600MB, so 3.4GB is used by the hash table. 3.4GB/100M rows = 34 bytes/row */ htSizeEstimate += rg.getDataSize() + (34 * rg.getRowCount()); if (htSizeEstimate > htTargetSize) ret += convertToSplitMode(); //cout << "wrote some data, returning " << ret << endl; } else { uint64_t hash, tmp; int i, j; for (i = 0; i < (int) rg.getRowCount(); i++) { rg.getRow(i, &row); if (antiWithMatchNulls && hasNullJoinColumn(row)) { if (needsAllNullRows || !gotNullRow) { for (j = 0; j < (int) bucketCount; j++) ret += buckets[j]->insertSmallSideRow(row); gotNullRow = true; } continue; } if (typelessJoin) hash = getHashOfTypelessKey(row, smallKeyCols, hashSeed) % bucketCount; else { if (UNLIKELY(row.isUnsigned(smallKeyCols[0]))) tmp = row.getUintField(smallKeyCols[0]); else tmp = row.getIntField(smallKeyCols[0]); hash = hasher((char *) &tmp, 8, hashSeed); hash = hasher.finalize(hash, 8) % bucketCount; } //cout << "hashing smallside row: " << row.toString() << endl; ret += buckets[hash]->insertSmallSideRow(row); } //cout << "distributed rows, returning " << ret << endl; } smallSizeOnDisk += ret; return ret; }