bool JoinPartition::getNextPartition(vector<RGData> *smallData, uint64_t *partitionID, JoinPartition **jp) { if (fileMode) { ByteStream bs; RGData rgData; if (nextPartitionToReturn > 0) return false; //cout << "reading the small side" << endl; nextSmallOffset = 0; while (1) { readByteStream(0, &bs); if (bs.length() == 0) break; rgData.deserialize(bs); //smallRG.setData(&rgData); //cout << "read a smallRG with " << smallRG.getRowCount() << " rows" << endl; smallData->push_back(rgData); } nextPartitionToReturn = 1; *partitionID = uniqueID; *jp = this; return true; } bool ret = false; while (!ret && nextPartitionToReturn < bucketCount) { ret = buckets[nextPartitionToReturn]->getNextPartition(smallData, partitionID, jp); if (!ret) nextPartitionToReturn++; } return ret; }
boost::shared_ptr<RGData> JoinPartition::getNextLargeRGData() { boost::shared_ptr<RGData> ret; ByteStream bs; readByteStream(1, &bs); if (bs.length() != 0) { ret.reset(new RGData()); ret->deserialize(bs); } else { boost::filesystem::remove(largeFilename); largeSizeOnDisk = 0; } return ret; }
virtual bool readStackObject() { uint64_t iIndex; if(!readVUInt(iIndex)) { setError("Expected stack object"); return false; } lua_State *L = m_L; if(lua_type(L, 1) != LUA_TTABLE) { // Ensure that index #1 is self environment lua_getfenv(L, 1); lua_replace(L, 1); } if(iIndex >= PERSIST_TCOUNT) { iIndex += 1 - PERSIST_TCOUNT; if(iIndex < (uint64_t)INT_MAX) lua_rawgeti(L, 1, (int)iIndex); else { lua_pushnumber(L, (lua_Number)iIndex); lua_rawget(L, 1); } if(lua_isnil(L, -1)) { setError("Cycle while depersisting permanent object key or userdata metatable"); return false; } } else { uint8_t iType = (uint8_t)iIndex; switch(iType) { case LUA_TNIL: lua_pushnil(L); break; case PERSIST_TPERMANENT: { uint64_t iOldIndex = m_iNextIndex; ++m_iNextIndex; // Temporary marker lua_rawgeti(L, 1, 0); // Permanents table if(!readStackObject()) return false; lua_gettable(L, -2); lua_replace(L, -2); // Replace marker with actual object uint64_t iNewIndex = m_iNextIndex; m_iNextIndex = iOldIndex; saveStackObject(); m_iNextIndex = iNewIndex; break; } case LUA_TBOOLEAN: lua_pushboolean(L, 0); break; case PERSIST_TTRUE: lua_pushboolean(L, 1); break; case LUA_TSTRING: { size_t iLength; if(!readVUInt(iLength)) return false; while(iLength > m_iStringBufferLength) { m_iStringBufferLength *= 2; m_sStringBuffer = (char*)realloc(m_sStringBuffer, m_iStringBufferLength); } if(!readByteStream((uint8_t*)m_sStringBuffer, iLength)) return false; lua_pushlstring(L, m_sStringBuffer, iLength); saveStackObject(); break; } case LUA_TTABLE: lua_newtable(L); saveStackObject(); if(!lua_checkstack(L, 8)) return false; if(!readTableContents()) return false; break; case PERSIST_TTABLE_WITH_META: lua_newtable(L); saveStackObject(); if(!lua_checkstack(L, 8)) return false; if(!readStackObject()) return false; lua_setmetatable(L, -2); if(!readTableContents()) return false; break; case LUA_TNUMBER: { double fValue; if(!readByteStream(reinterpret_cast<uint8_t*>(&fValue), sizeof(double))) return false; lua_pushnumber(L, fValue); break; } case LUA_TFUNCTION: { if(!lua_checkstack(L, 8)) return false; uint64_t iOldIndex = m_iNextIndex; ++m_iNextIndex; // Temporary marker if(!readStackObject()) return false; lua_call(L, 0, 2); // Replace marker with closure uint64_t iNewIndex = m_iNextIndex; m_iNextIndex = iOldIndex; saveStackObject(); m_iNextIndex = iNewIndex; // Set upvalues lua_insert(L, -2); int iNups, i; if(!readVUInt(iNups)) return false; size_t iIDSize; if(!readVUInt(iIDSize)) return false; for(i = 0; i < iNups; ++i) { if(!readStackObject()) return false; // For now, just skip over the upvalue IDs. In the future, // the ID may be used to rejoin shared upvalues. if(!readByteStream(NULL, iIDSize)) return false; } lua_call(L, iNups, 0); // Read environment if(!readStackObject()) return false; lua_setfenv(L, -2); break; } case PERSIST_TPROTOTYPE: { if(!lua_checkstack(L, 8)) return false; uint64_t iOldIndex = m_iNextIndex; ++m_iNextIndex; // Temporary marker int iNups; if(!readVUInt(iNups)) return false; if(iNups == 0) lua_pushliteral(L, "return function() end,"); else { lua_pushliteral(L, "local "); lua_checkstack(L, (iNups + 1) * 2); for(int i = 0; i < iNups; ++i) { if(i != 0) lua_pushliteral(L, ","); if(!readStackObject()) return false; if(lua_type(L, -1) != LUA_TSTRING) { setError("Upvalue name not a string"); return false; } } lua_concat(L, iNups * 2 - 1); lua_pushliteral(L, ";return function(...)"); lua_pushvalue(L, -2); lua_pushliteral(L, "=...end,"); lua_concat(L, 5); } // Fetch name and then lookup filename and code if(!readStackObject()) return false; lua_pushliteral(L, "@"); lua_rawgeti(L, 1, -1); lua_pushvalue(L, -3); lua_gettable(L, -2); lua_replace(L, -2); if(lua_isnil(L, -1)) { setError(lua_pushfstring(L, "Unable to depersist prototype" " \'%s\'", lua_tostring(L, -3))); return false; } lua_concat(L, 2); // Prepend the @ to the filename lua_rawgeti(L, 1, -2); lua_pushvalue(L, -3); lua_gettable(L, -2); lua_replace(L, -2); lua_remove(L, -3); // Construct the closure factory LoadMultiBuffer_t ls; ls.s[0] = lua_tolstring(L, -3, &ls.i[0]); ls.s[1] = lua_tolstring(L, -1, &ls.i[1]); if(lua_load(L, LoadMultiBuffer_t::load_fn, &ls, lua_tostring(L, -2)) != 0) { // Should never happen lua_error(L); return false; } lua_replace(L, -4); lua_pop(L, 2); // Replace marker with closure factory uint64_t iNewIndex = m_iNextIndex; m_iNextIndex = iOldIndex; saveStackObject(); m_iNextIndex = iNewIndex; break; } case LUA_TUSERDATA: { bool bHasSetMetatable = false; uint64_t iOldIndex = m_iNextIndex; ++m_iNextIndex; // Temporary marker // Read metatable if(!readStackObject()) return false; lua_getfield(L, -1, "__depersist_size"); if(!lua_isnumber(L, -1)) { setError("Userdata missing __depersist_size metafield"); return false; } lua_newuserdata(L, (size_t)lua_tonumber(L, -1)); lua_replace(L, -2); // Replace marker with userdata uint64_t iNewIndex = m_iNextIndex; m_iNextIndex = iOldIndex; saveStackObject(); m_iNextIndex = iNewIndex; // Perform immediate initialisation lua_getfield(L, -2, "__pre_depersist"); if(lua_isnil(L, -1)) lua_pop(L, 1); else { // Set metatable now, as pre-depersister may expect it // NB: Setting metatable if there isn't a pre-depersister // is not a good idea, as if there is an error while the // environment table is being de-persisted, then the __gc // handler of the userdata will eventually be called with // the userdata's contents still being uninitialised. lua_pushvalue(L, -3); lua_setmetatable(L, -3); bHasSetMetatable = true; lua_pushvalue(L, -2); lua_call(L, 1, 0); } // Read environment if(!readStackObject()) return false; lua_setfenv(L, -2); // Set metatable and read the raw data if(!bHasSetMetatable) { lua_pushvalue(L, -2); lua_setmetatable(L, -2); } lua_getfield(L, -2, "__depersist"); if(lua_isnil(L, -1)) lua_pop(L, 1); else { lua_pushvalue(L, -2); lua_rawgeti(L, 1, -3); lua_call(L, 2, 1); if(lua_toboolean(L, -1) != 0) { lua_pop(L, 1); lua_rawgeti(L, 1, -3); lua_getmetatable(L, -1); lua_replace(L, -2); lua_pushvalue(L, -2); lua_rawseti(L, -2, (int)lua_objlen(L, -2) + 1); } lua_pop(L, 1); } lua_replace(L, -2); uint64_t iSyncMarker; if(!readVUInt(iSyncMarker)) return false; if(iSyncMarker != 0x42) { setError("sync fail"); return false; } break; } case PERSIST_TINTEGER: { uint16_t iValue; if(!readVUInt(iValue)) return false; lua_pushinteger(L, iValue); break; } default: lua_pushliteral(L, "Unable to depersist values of type \'"); if(iType <= LUA_TTHREAD) lua_pushstring(L, lua_typename(L, iType)); else { switch(iType) { case PERSIST_TPERMANENT: lua_pushliteral(L, "permanent"); break; case PERSIST_TTRUE: lua_pushliteral(L, "boolean-true"); break; case PERSIST_TTABLE_WITH_META: lua_pushliteral(L, "table-with-metatable"); break; case PERSIST_TINTEGER: lua_pushliteral(L, "integer"); break; case PERSIST_TPROTOTYPE: lua_pushliteral(L, "prototype"); break; case PERSIST_TRESERVED1: lua_pushliteral(L, "reserved1"); break; case PERSIST_TRESERVED2: lua_pushliteral(L, "reserved2"); break; } } lua_pushliteral(L, "\'"); lua_concat(L, 3); setError(lua_tostring(L, -1)); lua_pop(L, 1); return false; } } return true; }
int64_t JoinPartition::convertToSplitMode() { int i, j; ByteStream bs; RGData rgData; uint32_t hash; uint64_t tmp; int64_t ret = -(int64_t)smallSizeOnDisk; // smallFile gets deleted boost::scoped_array<uint32_t> rowDist(new uint32_t[bucketCount]); uint32_t rowCount = 0; memset(rowDist.get(), 0, sizeof(uint32_t) * bucketCount); fileMode = false; htSizeEstimate = 0; smallSizeOnDisk = 0; buckets.reserve(bucketCount); for (i = 0; i < (int) bucketCount; i++) buckets.push_back(boost::shared_ptr<JoinPartition>(new JoinPartition(*this, false))); RowGroup &rg = smallRG; Row &row = smallRow; nextSmallOffset = 0; while (1) { readByteStream(0, &bs); if (bs.length() == 0) break; rgData.deserialize(bs); rg.setData(&rgData); for (j = 0; j < (int) rg.getRowCount(); j++) { rg.getRow(j, &row); if (antiWithMatchNulls && hasNullJoinColumn(row)) { if (needsAllNullRows || !gotNullRow) { for (j = 0; j < (int) bucketCount; j++) ret += buckets[j]->insertSmallSideRow(row); gotNullRow = true; } continue; } if (typelessJoin) hash = getHashOfTypelessKey(row, smallKeyCols, hashSeed) % bucketCount; else { if (UNLIKELY(row.isUnsigned(smallKeyCols[0]))) tmp = row.getUintField(smallKeyCols[0]); else tmp = row.getIntField(smallKeyCols[0]); hash = hasher((char *) &tmp, 8, hashSeed); hash = hasher.finalize(hash, 8) % bucketCount; } rowCount++; rowDist[hash]++; ret += buckets[hash]->insertSmallSideRow(row); } } boost::filesystem::remove(smallFilename); smallFilename.clear(); for (i = 0; i < (int) bucketCount; i++) if (rowDist[i] == rowCount) throw IDBExcept("All rows hashed to the same bucket", ERR_DBJ_DATA_DISTRIBUTION); rg.setData(&buffer); rg.resetRowGroup(0); rg.getRow(0, &row); return ret; }