Пример #1
0
bool JoinPartition::getNextPartition(vector<RGData> *smallData, uint64_t *partitionID, JoinPartition **jp)
{

	if (fileMode) {
		ByteStream bs;
		RGData rgData;

		if (nextPartitionToReturn > 0)
			return false;

		//cout << "reading the small side" << endl;
		nextSmallOffset = 0;
		while (1) {
			readByteStream(0, &bs);
			if (bs.length() == 0)
				break;
			rgData.deserialize(bs);
			//smallRG.setData(&rgData);
			//cout << "read a smallRG with " << smallRG.getRowCount() << " rows" << endl;
			smallData->push_back(rgData);
		}
		nextPartitionToReturn = 1;
		*partitionID = uniqueID;
		*jp = this;
		return true;
	}

	bool ret = false;
	while (!ret && nextPartitionToReturn < bucketCount) {
		ret = buckets[nextPartitionToReturn]->getNextPartition(smallData, partitionID, jp);
		if (!ret)
			nextPartitionToReturn++;
	}
	return ret;
}
uint64_t UpdatePackageProcessor::fixUpRows(dmlpackage::CalpontDMLPackage& cpackage, DMLResult& result,
        const uint64_t uniqueId, const uint32_t tableOid)
{
    ByteStream msg, msgBk, emsgBs;
    RGData rgData;
    uint32_t qb = 4;
    msg << qb;
    boost::scoped_ptr<rowgroup::RowGroup> rowGroup;
    uint64_t rowsProcessed = 0;
    uint32_t dbroot = 1;
    bool metaData = false;
    oam::OamCache* oamCache = oam::OamCache::makeOamCache();
    std::vector<int> fPMs = oamCache->getModuleIds();
    std::map<unsigned, bool> pmState;
    string emsg;
    string emsgStr;
    bool err = false;

    //boost::scoped_ptr<messageqcpp::MessageQueueClient> fExeMgr;
    //fExeMgr.reset( new messageqcpp::MessageQueueClient("ExeMgr1"));
    try
    {

        for (unsigned i = 0; i < fPMs.size(); i++)
        {
            pmState[fPMs[i]] = true;
        }

        //timer.start("ExeMgr");
        fExeMgr->write(msg);
        fExeMgr->write(*(cpackage.get_ExecutionPlan()));
        //cout << "sending to ExeMgr plan with length " << (cpackage.get_ExecutionPlan())->length() << endl;
        msg.restart();
        emsgBs.restart();
        msg = fExeMgr->read(); //error handling

        if (msg.length() == 4)
        {
            msg >> qb;

            if (qb != 0)
                err = true;
        }
        else
        {
            qb = 999;
            err = true;
        }

        if (err)
        {
            logging::Message::Args args;
            logging::Message message(2);
            args.add("Update Failed: ExeMgr Error");
            args.add((int)qb);
            message.format(args);
            result.result = UPDATE_ERROR;
            result.message = message;
            //timer.finish();
            return rowsProcessed;
        }

        emsgBs = fExeMgr->read();

        if (emsgBs.length() == 0)
        {
            logging::Message::Args args;
            logging::Message message(2);
            args.add("Update Failed: ");
            args.add("Lost connection to ExeMgr");
            message.format(args);
            result.result = UPDATE_ERROR;
            result.message = message;
            //timer.finish();
            return rowsProcessed;
        }

        emsgBs >> emsgStr;

        while (true)
        {
            if (fRollbackPending)
            {
                break;
            }

            msg.restart();
            msgBk.restart();
            msg = fExeMgr->read();
            msgBk = msg;

            if ( msg.length() == 0 )
            {
                cerr << "UpdatePackageProcessor::processPackage::fixupRows" << endl;
                logging::Message::Args args;
                logging::Message message(2);
                args.add("Update Failed: ");
                args.add("Lost connection to ExeMgr");
                message.format(args);
                result.result = UPDATE_ERROR;
                result.message = message;
                //timer.finish();
                //return rowsProcessed;
                break;
            }
            else
            {
                if (rowGroup.get() == NULL)
                {
                    //This is mete data, need to send all PMs.
                    metaData = true;
                    //cout << "sending meta data" << endl;
                    //timer.start("Meta");
                    err = processRowgroup(msgBk, result, uniqueId, cpackage, pmState, metaData, dbroot);
                    rowGroup.reset(new rowgroup::RowGroup());
                    rowGroup->deserialize(msg);
                    qb = 100;
                    msg.restart();
                    msg << qb;
                    fExeMgr->write(msg);
                    metaData = false;
                    //timer.stop("Meta");
                    continue;
                }

                rgData.deserialize(msg, true);
                rowGroup->setData(&rgData);
                //rowGroup->setData(const_cast<uint8_t*>(msg.buf()));
                err = (rowGroup->getStatus() != 0);

                if (err)
                {
                    //msgBk.advance(rowGroup->getDataSize());
                    string errorMsg;
                    msg >> errorMsg;
                    logging::Message::Args args;
                    logging::Message message(2);
                    args.add("Update Failed: ");
                    args.add(errorMsg);
                    message.format(args);
                    result.result = UPDATE_ERROR;
                    result.message = message;
                    DMLResult tmpResult;
                    receiveAll( tmpResult, uniqueId, fPMs, pmState, tableOid);
                    /*					qb = 100;
                    					//@Bug 4358 get rid of broken pipe error.
                    					msg.restart();
                    					msg << qb;
                    					fExeMgr->write(msg);
                    */					//timer.finish();
                    //return rowsProcessed;
                    //err = true;
                    break;
                }

                if (rowGroup->getRGData() == NULL)
                {
                    msg.restart();
                }

                if (rowGroup->getRowCount() == 0)  //done fetching
                {
                    //timer.finish();
                    //need to receive all response
                    err = receiveAll( result, uniqueId, fPMs, pmState, tableOid);
                    //return rowsProcessed;
                    break;
                }

                if (rowGroup->getBaseRid() == (uint64_t) (-1))
                {
                    continue;  // @bug4247, not valid row ids, may from small side outer
                }

                dbroot = rowGroup->getDBRoot();
                //cout << "dbroot in the rowgroup is " << dbroot << endl;
                //timer.start("processRowgroup");
                err = processRowgroup(msgBk, result, uniqueId, cpackage, pmState, metaData, dbroot);

                //timer.stop("processRowgroup");
                if (err)
                {
                    //timer.finish();
                    LoggingID logid( DMLLoggingId, fSessionID, cpackage.get_TxnID());
                    logging::Message::Args args1;
                    logging::Message msg1(1);
                    args1.add("SQL statement erroring out, need to receive all messages from WES");
                    msg1.format( args1 );
                    logging::Logger logger(logid.fSubsysID);
                    logger.logMessage(LOG_TYPE_DEBUG, msg1, logid);
                    DMLResult tmpResult;
                    receiveAll( tmpResult, uniqueId, fPMs, pmState, tableOid);
                    logging::Message::Args args2;
                    logging::Message msg2(1);
                    args2.add("SQL statement erroring out, received all messages from WES");
                    msg2.format( args2 );
                    logger.logMessage(LOG_TYPE_DEBUG, msg2, logid);
                    //@Bug 4358 get rid of broken pipe error.
                    /*					msg.restart();
                    					msg << qb;
                    					fExeMgr->write(msg);
                    					return rowsProcessed;
                    */
                    //err = true;
                    break;
                }

                rowsProcessed += rowGroup->getRowCount();
            }
        }

        if (fRollbackPending)
        {
            err = true;
            // Response to user
            cerr << "UpdatePackageProcessor::processPackage::fixupRows Rollback Pending" << endl;
            //@Bug 4994 Cancelled job is not error
            result.result = JOB_CANCELED;

            // Log
            LoggingID logid( DMLLoggingId, fSessionID, cpackage.get_TxnID());
            logging::Message::Args args1;
            logging::Message msg1(1);
            args1.add("SQL statement canceled by user");
            msg1.format( args1 );
            logging::Logger logger(logid.fSubsysID);
            logger.logMessage(LOG_TYPE_DEBUG, msg1, logid);

            // Clean out the pipe;
            DMLResult tmpResult;
            receiveAll( tmpResult, uniqueId, fPMs, pmState, tableOid);
        }

        // get stats from ExeMgr
        if (!err)
        {
            qb = 3;
            msg.restart();
            msg << qb;
            fExeMgr->write(msg);
            msg = fExeMgr->read();
            msg >> result.queryStats;
            msg >> result.extendedStats;
            msg >> result.miniStats;
            result.stats.unserialize(msg);
        }
Пример #3
0
int64_t JoinPartition::convertToSplitMode()
{
	int i, j;
	ByteStream bs;
	RGData rgData;
	uint32_t hash;
	uint64_t tmp;
	int64_t ret = -(int64_t)smallSizeOnDisk;    // smallFile gets deleted
	boost::scoped_array<uint32_t> rowDist(new uint32_t[bucketCount]);
	uint32_t rowCount = 0;

	memset(rowDist.get(), 0, sizeof(uint32_t) * bucketCount);
	fileMode = false;
	htSizeEstimate = 0;
	smallSizeOnDisk = 0;
	buckets.reserve(bucketCount);
	for (i = 0; i < (int) bucketCount; i++)
		buckets.push_back(boost::shared_ptr<JoinPartition>(new JoinPartition(*this, false)));

	RowGroup &rg = smallRG;
	Row &row = smallRow;
	nextSmallOffset = 0;
	while (1) {
		readByteStream(0, &bs);
		if (bs.length() == 0)
			break;
		rgData.deserialize(bs);
		rg.setData(&rgData);
		for (j = 0; j < (int) rg.getRowCount(); j++) {
			rg.getRow(j, &row);

			if (antiWithMatchNulls && hasNullJoinColumn(row)) {
				if (needsAllNullRows || !gotNullRow) {
					for (j = 0; j < (int) bucketCount; j++)
						ret += buckets[j]->insertSmallSideRow(row);
					gotNullRow = true;
				}
				continue;
			}

			if (typelessJoin)
				hash = getHashOfTypelessKey(row, smallKeyCols, hashSeed) % bucketCount;
			else {
				if (UNLIKELY(row.isUnsigned(smallKeyCols[0])))
					tmp = row.getUintField(smallKeyCols[0]);
				else
					tmp = row.getIntField(smallKeyCols[0]);
				hash = hasher((char *) &tmp, 8, hashSeed);
				hash = hasher.finalize(hash, 8) % bucketCount;
			}
			rowCount++;
			rowDist[hash]++;
			ret += buckets[hash]->insertSmallSideRow(row);
		}
	}
	boost::filesystem::remove(smallFilename);
	smallFilename.clear();

	for (i = 0; i < (int) bucketCount; i++)
		if (rowDist[i] == rowCount)
			throw IDBExcept("All rows hashed to the same bucket", ERR_DBJ_DATA_DISTRIBUTION);

	rg.setData(&buffer);
	rg.resetRowGroup(0);
	rg.getRow(0, &row);

	return ret;
}