Пример #1
0
int64_t JoinPartition::processLargeBuffer(RGData &rgData)
{
	RowGroup &rg = largeRG;
	Row &row = largeRow;
	int64_t ret = 0;
	int i, j;

	rg.setData(&rgData);

	//if (rootNode)
	//	cout << "largeside RGData: "  << rg.toString() << endl;

	/* Need to fail a query with an anti join, an FE filter, and a NULL row on the
	large side b/c it needs to be joined with the entire small side table. */
	if (antiWithMatchNulls && needsAllNullRows) {
		rg.getRow(0, &row);
		for (i = 0; i < (int) rg.getRowCount(); i++, row.nextRow()) {
			for (j = 0; j < (int) largeKeyCols.size(); j++) {
				if (row.isNullValue(largeKeyCols[j]))
					throw QueryDataExcept("", ERR_DBJ_ANTI_NULL);
			}
		}
	}

	if (fileMode) {
		ByteStream bs;
		rg.serializeRGData(bs);
		//cout << "writing large RGData: " << rg.toString() << endl;
		ret = writeByteStream(1, bs);
		//cout << "wrote " << ret << " bytes" << endl;
	}
	else {
		uint64_t hash, tmp;
		int i;

		for (i = 0; i < (int) rg.getRowCount(); i++) {
			rg.getRow(i, &row);
			if (typelessJoin)
				hash = getHashOfTypelessKey(row, largeKeyCols, hashSeed) % bucketCount;
			else {
				if (UNLIKELY(row.isUnsigned(largeKeyCols[0])))
					tmp = row.getUintField(largeKeyCols[0]);
				else
					tmp = row.getIntField(largeKeyCols[0]);
				hash = hasher((char *) &tmp, 8, hashSeed);
				hash = hasher.finalize(hash, 8) % bucketCount;
			}
			//cout << "large side hashing row: " << row.toString() << endl;
			ret += buckets[hash]->insertLargeSideRow(row);
		}
	}
	largeSizeOnDisk += ret;
	return ret;
}
Пример #2
0
    void writePrototype(lua_Debug *pProtoInfo, int iInstanceIndex)
    {
        lua_State *L = m_L;

        // Sanity checks
        if(pProtoInfo->source[0] != '@')
        {
            // @ denotes that the source was a file
            // (http://www.lua.org/manual/5.1/manual.html#lua_Debug)
            setError("Can only persist Lua functions defined in source files");
            return;
        }
        if(strcmp(pProtoInfo->what, "Lua") != 0)
        {
            // what == "C" should have been caught by writeObjectRaw().
            // what == "tail" should be impossible.
            // Hence "Lua" and "main" should be the only values seen.
            // NB: Chunks are not functions defined *in* source files, because
            // chunks *are* source files.
            setError(lua_pushfstring(L, "Cannot persist entire Lua chunks (%s)", pProtoInfo->source + 1));
            lua_pop(L, 1);
            return;
        }

        // Attempt cached lookup (prototypes are not publicly visible Lua objects,
        // and hence cannot be cached in the normal way of self's environment).
        lua_getmetatable(L, 1);
        lua_pushfstring(L, "%s:%d", pProtoInfo->source + 1, pProtoInfo->linedefined);
        lua_pushvalue(L, -1);
        lua_rawget(L, -3);
        if(!lua_isnil(L, -1))
        {
            uint64_t iValue = (uint64_t)lua_tonumber(L, -1);
            lua_pop(L, 3);
            writeVUInt(iValue + PERSIST_TCOUNT - 1);
            return;
        }
        lua_pop(L, 1);
        lua_pushvalue(L, -1);
        lua_pushnumber(L, (lua_Number)m_iNextIndex++);
        lua_rawset(L, -4);

        uint8_t iType = PERSIST_TPROTOTYPE;
        writeByteStream(&iType, 1);

        // Write upvalue names
        writeVUInt(pProtoInfo->nups);
        for(int i = 1; i <= pProtoInfo->nups; ++i)
        {
            lua_pushstring(L, lua_getupvalue(L, iInstanceIndex, i));
            writeStackObject(-1);
            lua_pop(L, 2);
        }

        // Write the function's persist name
        lua_rawgeti(L, -2, 1);
        lua_replace(L, -3);
        lua_rawget(L, -2);
        if(lua_isnil(L, -1))
        {
            setError(lua_pushfstring(L, "Lua functions must be given a unique "
                "persistable name in order to be persisted (attempt to persist"
                " %s:%d)", pProtoInfo->source + 1, pProtoInfo->linedefined));
            lua_pop(L, 2);
            return;
        }
        writeStackObject(-1);
        lua_pop(L, 2);
    }
Пример #3
0
    int writeObjectRaw()
    {
        lua_State *L = m_L;
        uint8_t iType;

        // Save the index to the cache
        lua_pushvalue(L, 2);
        lua_pushnumber(L, (lua_Number)(m_iNextIndex++));
        lua_settable(L, 1);

        // Lookup the object in the permanents table
        lua_pushvalue(L, 2);
        lua_gettable(L, luaT_upvalueindex(1));
        if(lua_type(L, -1) != LUA_TNIL)
        {
            // Object is in the permanents table.

            uint8_t iType = PERSIST_TPERMANENT;
            writeByteStream(&iType, 1);

            // Replace self's environment with self (for call to writeStackObject)
            lua_pushvalue(L, luaT_upvalueindex(2));
            lua_replace(L, 1);

            // Write the key corresponding to the permanent object
            writeStackObject(-1);
        }
        else
        {
            // Object is not in the permanents table.
            lua_pop(L, 1);

            switch(lua_type(L, 2))
            {
            // LUA_TNIL handled in writeStackObject
            // LUA_TBOOLEAN handled in writeStackObject
            // LUA_TNUMBER handled in writeStackObject

            case LUA_TSTRING: {
                iType = LUA_TSTRING;
                writeByteStream(&iType, 1);
                // Strings are simple: length and then bytes (not null terminated)
                size_t iLength;
                const char *sString = lua_tolstring(L, 2, &iLength);
                writeVUInt(iLength);
                writeByteStream(reinterpret_cast<const uint8_t*>(sString), iLength);
                break; }

            case LUA_TTABLE: {
                // Replace self's environment with self (for calls to writeStackObject)
                lua_pushvalue(L, luaT_upvalueindex(2));
                lua_replace(L, 1);

                // Save env and insert prior to table
                lua_getfenv(L, 1);
                lua_insert(L, 2);

                int iTable = 3; table_reentry:

                // Handle the metatable
                if(lua_getmetatable(L, iTable))
                {
                    iType = PERSIST_TTABLE_WITH_META;
                    writeByteStream(&iType, 1);
                    writeStackObject(-1);
                    lua_pop(L, 1);
                }
                else
                {
                    iType = LUA_TTABLE;
                    writeByteStream(&iType, 1);
                }

                // Write the children as key, value pairs
                lua_pushnil(L);
                while(lua_next(L, iTable))
                {
                    writeStackObject(-2);
                    // The naive thing to do now would be writeStackObject(-1)
                    // but this can easily lead to Lua's C call stack limit
                    // being hit. To reduce the likelyhood of this happening,
                    // we check to see if about to write another table.
                    if(lua_type(L, -1) == LUA_TTABLE)
                    {
                        lua_pushvalue(L, -1);
                        lua_rawget(L, 2);
                        lua_pushvalue(L, -2);
                        lua_gettable(L, luaT_upvalueindex(1));
                        if(lua_isnil(L, -1) && lua_isnil(L, -2))
                        {
                            lua_pop(L, 2);
                            lua_checkstack(L, 10);
                            iTable += 2;
                            lua_pushvalue(L, iTable);
                            lua_pushnumber(L, (lua_Number)(m_iNextIndex++));
                            lua_settable(L, 2);
                            goto table_reentry; table_resume:
                            iTable -= 2;
                        }
                        else
                        {
                            lua_pop(L, 2);
                            writeStackObject(-1);
                        }
                    }
                    else
                        writeStackObject(-1);
                    lua_pop(L, 1);
                }

                // Write a nil to mark the end of the children (as nil is the
                // only value which cannot be used as a key in a table).
                iType = LUA_TNIL;
                writeByteStream(&iType, 1);
                if(iTable != 3)
                    goto table_resume;
                break; }

            case LUA_TFUNCTION:
                if(lua_iscfunction(L, 2))
                {
                    setErrorObject(2);
                    setError("Cannot persist C functions");
                }
                else
                {
                    iType = LUA_TFUNCTION;
                    writeByteStream(&iType, 1);

                    // Replace self's environment with self (for calls to writeStackObject)
                    lua_pushvalue(L, luaT_upvalueindex(2));
                    lua_replace(L, 1);

                    // Write the prototype (the part of a function which is common across
                    // multiple closures - see LClosure / Proto in Lua's lobject.h).
                    lua_Debug proto_info;
                    lua_pushvalue(L, 2);
                    lua_getinfo(L, ">Su", &proto_info);
                    writePrototype(&proto_info, 2);

                    // Write the values of the upvalues
                    // If available, also write the upvalue IDs (so that in
                    // the future, we could hypothetically rejoin shared
                    // upvalues). An ID is just an opaque sequence of bytes.
                    writeVUInt(proto_info.nups);
#if LUA_VERSION_NUM >= 502
                    writeVUInt(sizeof(void*));
#else
                    writeVUInt(0);
#endif
                    for(int i = 1; i <= proto_info.nups; ++i)
                    {
                        lua_getupvalue(L, 2, i);
                        writeStackObject(-1);
#if LUA_VERSION_NUM >= 502
                        void *pUpvalueID = lua_upvalueid(L, 2, i);
                        writeByteStream((uint8_t*)&pUpvalueID, sizeof(void*));
#endif
                    }

                    // Write the environment table
                    lua_getfenv(L, 2);
                    writeStackObject(-1);
                    lua_pop(L, 1);
                }
                break;

            case LUA_TUSERDATA:
                if(!_checkThatUserdataCanBeDepersisted(2))
                    break;

                // Replace self's environment with self (for calls to writeStackObject)
                lua_pushvalue(L, luaT_upvalueindex(2));
                lua_replace(L, 1);

                // Write type, metatable, and then environment
                iType = LUA_TUSERDATA;
                writeByteStream(&iType, 1);
                writeStackObject(-1);
                lua_getfenv(L, 2);
                writeStackObject(-1);
                lua_pop(L, 1);

                // Write the raw data
                if(lua_type(L, -1) == LUA_TTABLE)
                {
                    lua_getfield(L, -1, "__persist");
                    if(lua_isnil(L, -1))
                        lua_pop(L, 1);
                    else
                    {
                        lua_pushvalue(L, 2);
                        lua_pushvalue(L, luaT_upvalueindex(2));
                        lua_call(L, 2, 0);
                    }
                }
                writeVUInt((uint64_t)0x42); // sync marker
                break;

            default:
                setError(lua_pushfstring(L, "Cannot persist %s values", luaL_typename(L, 2)));
                break;
            }
        }
        lua_pushnumber(L, 0);
        return 1;
    }
Пример #4
0
    virtual void writeStackObject(int iIndex)
    {
        lua_State *L = m_L;

        // Convert index from relative to absolute
        if(iIndex < 0 && iIndex > LUA_REGISTRYINDEX)
            iIndex = lua_gettop(L) + 1 + iIndex;

        // Basic types always have their value written
        int iType = lua_type(L, iIndex);
        if(iType == LUA_TNIL || iType == LUA_TNONE)
        {
            uint8_t iByte = LUA_TNIL;
            writeByteStream(&iByte, 1);
        }
        else if(iType == LUA_TBOOLEAN)
        {
            uint8_t iByte;
            if(lua_toboolean(L, iIndex))
                iByte = PERSIST_TTRUE;
            else
                iByte = LUA_TBOOLEAN;
            writeByteStream(&iByte, 1);
        }
        else if(iType == LUA_TNUMBER)
        {
            double fValue = lua_tonumber(L, iIndex);
            if(floor(fValue) == fValue && 0.0 <= fValue && fValue <= 16383.0)
            {
                // Small integers are written as just a few bytes
                // NB: 16383 = 2^14-1, which is the maximum value which
                // can fit into two bytes of VUInt.
                uint8_t iByte = PERSIST_TINTEGER;
                writeByteStream(&iByte, 1);
                uint16_t iValue = (uint16_t)fValue;
                writeVUInt(iValue);
            }
            else
            {
                // Other numbers are written as an 8 byte double
                uint8_t iByte = LUA_TNUMBER;
                writeByteStream(&iByte, 1);
                writeByteStream(reinterpret_cast<uint8_t*>(&fValue), sizeof(double));
            }
        }
        else
        {
            // Complex values are cached, and are only written once (if this weren't
            // done, then cycles in the object graph would break things).
            lua_getfenv(L, 1);
            lua_pushvalue(L, iIndex);
            lua_gettable(L, -2); // Might (indirectly) call writeObjectRaw
            uint64_t iValue = (uint64_t)lua_tonumber(L, -1);
            lua_pop(L, 2);
            if(iValue != 0)
            {
                // If the value has not previously been written, then writeObjectRaw
                // would have been called, and the appropriate data written, and 0
                // would be returned. Otherwise, the index would be returned, which
                // we offset by the number of types, and then write.
                writeVUInt(iValue + PERSIST_TCOUNT - 1);
            }
        }
    }
Пример #5
0
    virtual void fastWriteStackObject(int iIndex)
    {
        lua_State *L = m_L;

        if(lua_type(L, iIndex) != LUA_TUSERDATA)
        {
            writeStackObject(iIndex);
            return;
        }

        // Convert index from relative to absolute
        if(iIndex < 0 && iIndex > LUA_REGISTRYINDEX)
            iIndex = lua_gettop(L) + 1 + iIndex;

        // Check for no cycle
        lua_getfenv(L, 1);
        lua_pushvalue(L, iIndex);
        lua_rawget(L, -2);
        lua_rawgeti(L, -2, 1);
        lua_pushvalue(L, iIndex);
        lua_gettable(L, -2);
        lua_replace(L, -2);
        if(!lua_isnil(L, -1) || !lua_isnil(L, -2))
        {
            lua_pop(L, 3);
            writeStackObject(iIndex);
            return;
        }
        lua_pop(L, 2);

        // Save the index to the cache
        lua_pushvalue(L, iIndex);
        lua_pushnumber(L, (lua_Number)(m_iNextIndex++));
        lua_settable(L, -3);

        if(!_checkThatUserdataCanBeDepersisted(iIndex))
            return;

        // Write type, metatable, and then environment
        uint8_t iType = LUA_TUSERDATA;
        writeByteStream(&iType, 1);
        writeStackObject(-1);
        lua_getfenv(L, iIndex);
        writeStackObject(-1);
        lua_pop(L, 1);

        // Write the raw data
        if(lua_type(L, -1) == LUA_TTABLE)
        {
            lua_getfield(L, -1, "__persist");
            if(lua_isnil(L, -1))
                lua_pop(L, 1);
            else
            {
                lua_pushvalue(L, iIndex);
                lua_checkstack(L, 20);
                lua_CFunction fn = lua_tocfunction(L, -2);
                fn(L);
                lua_pop(L, 2);
            }
        }
        writeVUInt((uint64_t)0x42); // sync marker
        lua_pop(L, 1);
    }
Пример #6
0
int64_t JoinPartition::processSmallBuffer(RGData &rgData)
{
	RowGroup &rg = smallRG;
	Row &row = smallRow;
	int64_t ret = 0;

	rg.setData(&rgData);
	//if (rootNode)
		//cout << "smallside RGData: " << rg.toString() << endl;

	if (fileMode) {
		ByteStream bs;
		rg.serializeRGData(bs);
		//cout << "writing RGData: " << rg.toString() << endl;

		ret = writeByteStream(0, bs);
		//cout << "wrote " << ret << " bytes" << endl;

		/* Check whether this partition is now too big -> convert to split mode.

		The current estimate is based on 100M 4-byte rows = 4GB.  The total size is
		the amount stored in RowGroups in mem + the size of the hash table.  The RowGroups
		in that case use 600MB, so 3.4GB is used by the hash table.  3.4GB/100M rows = 34 bytes/row
		*/
		htSizeEstimate += rg.getDataSize() + (34 * rg.getRowCount());
		if (htSizeEstimate > htTargetSize)
			ret += convertToSplitMode();

		//cout << "wrote some data, returning " << ret << endl;
	}
	else {
		uint64_t hash, tmp;
		int i, j;

		for (i = 0; i < (int) rg.getRowCount(); i++) {
			rg.getRow(i, &row);

			if (antiWithMatchNulls && hasNullJoinColumn(row)) {
				if (needsAllNullRows || !gotNullRow) {
					for (j = 0; j < (int) bucketCount; j++)
						ret += buckets[j]->insertSmallSideRow(row);
					gotNullRow = true;
				}
				continue;
			}

			if (typelessJoin)
				hash = getHashOfTypelessKey(row, smallKeyCols, hashSeed) % bucketCount;
			else {
				if (UNLIKELY(row.isUnsigned(smallKeyCols[0])))
					tmp = row.getUintField(smallKeyCols[0]);
				else
					tmp = row.getIntField(smallKeyCols[0]);
				hash = hasher((char *) &tmp, 8, hashSeed);
				hash = hasher.finalize(hash, 8) % bucketCount;
			}
			//cout << "hashing smallside row: " << row.toString() << endl;
			ret += buckets[hash]->insertSmallSideRow(row);
		}
		//cout << "distributed rows, returning " << ret << endl;
	}
	smallSizeOnDisk += ret;
	return ret;
}