//------------------------------------------------------------------------------ // Send HWM and CP update information to BRM //------------------------------------------------------------------------------ int BRMReporter::sendHWMandCPToBRM( ) { int rc = NO_ERROR; if (fHWMInfo.size() > 0) { std::ostringstream oss; oss << "Committing " << fHWMInfo.size() << " HWM update(s) for table "<< fTableName << " to BRM"; fLog->logMsg( oss.str(), MSGLVL_INFO2 ); } if (fCPInfo.size() > 0) { std::ostringstream oss; oss << "Committing " << fCPInfo.size() << " CP update(s) for table " << fTableName << " to BRM"; fLog->logMsg( oss.str(), MSGLVL_INFO2 ); } if ((fHWMInfo.size() > 0) || (fCPInfo.size() > 0)) { rc = BRMWrapper::getInstance()->bulkSetHWMAndCP( fHWMInfo, fCPInfo ); if (rc != NO_ERROR) { WErrorCodes ec; std::ostringstream oss; oss << "Error updating BRM with HWM and CP data for table " << fTableName << "; " << ec.errorString(rc); fLog->logMsg( oss.str(), rc, MSGLVL_ERROR ); return rc; } } return rc; }
//------------------------------------------------------------------------------ // Close the current compressed Column file after first compressing/flushing // any remaining data, and re-writing the headers as well. //------------------------------------------------------------------------------ int ColumnInfoCompressed::closeColumnFile(bool bCompletingExtent,bool bAbort) { int rc = NO_ERROR; if ( curCol.dataFile.pFile ) { if (!bAbort) { // If we are opening and closing a file in order to add an extent as // part of preliminary block skipping, then we won't have a Column- // BufferManger object yet. One will be created when the file is // reopened to begin importing. if (fColBufferMgr) { rc = fColBufferMgr->finishFile( bCompletingExtent ); if (rc != NO_ERROR) { WErrorCodes ec; std::ostringstream oss; oss << "Error closing compressed file; OID-" << curCol.dataFile.fid << "; DBRoot-" << curCol.dataFile.fDbRoot << "; part-" << curCol.dataFile.fPartition << "; seg-" << curCol.dataFile.fSegment << "; " << ec.errorString(rc); fLog->logMsg( oss.str(), rc, MSGLVL_ERROR ); bAbort = true; } } } ColumnInfo::closeColumnFile(bCompletingExtent, bAbort); } return rc; }
//------------------------------------------------------------------------------ // Main entry point into the cpimport.bin program //------------------------------------------------------------------------------ int main(int argc, char **argv) { #ifdef _MSC_VER _setmaxstdio(2048); #else setuid( 0 ); // set effective ID to root; ignore return status #endif setupSignalHandlers(); // Set up LOCALE - BUG 5362 std::string systemLang("C"); systemLang = funcexp::utf8::idb_setlocale(); // Initialize singleton instance of syslogging if (argc > 0) pgmName = argv[0]; logging::IDBErrorInfo::instance(); SimpleSysLog::instance()->setLoggingID( logging::LoggingID(SUBSYSTEM_ID_WE_BULK) ); // Log job initiation unless user is asking for help std::ostringstream ossArgList; bool bHelpFlag = false; for (int m=1; m<argc; m++) { if (strcmp(argv[m],"-h") == 0) { bHelpFlag = true; break; } if (!strcmp(argv[m],"\t")) // special case to print a <TAB> ossArgList << "'\\t'" << ' '; else ossArgList << argv[m] << ' '; } if (!bHelpFlag) { logInitiateMsg( ossArgList.str().c_str() ); } BulkLoad curJob; string sJobIdStr; string sXMLJobDir; string sModuleIDandPID; bool bLogInfo2ToConsole = false; bool bValidateColumnList= true; bool bRollback = false; bool bForce = false; int rc = NO_ERROR; std::string exceptionMsg; TASK task; // track tasks being performed try { //-------------------------------------------------------------------------- // Parse the command line arguments //-------------------------------------------------------------------------- task = TASK_CMD_LINE_PARSING; string xmlGenSchema; string xmlGenTable; parseCmdLineArgs( argc, argv, curJob, sJobIdStr, sXMLJobDir, sModuleIDandPID, bLogInfo2ToConsole, xmlGenSchema, xmlGenTable, bValidateColumnList ); //-------------------------------------------------------------------------- // Save basename portion of program path from argv[0] //-------------------------------------------------------------------------- string base; string::size_type startBase = string(argv[0]).rfind('/'); if (startBase == string::npos) base.assign( argv[0] ); else base.assign( argv[0]+startBase+1 ); curJob.setProcessName( base ); if (bDebug) logInitiateMsg( "Command line arguments parsed" ); //-------------------------------------------------------------------------- // Init singleton classes (other than syslogging that we already setup) //-------------------------------------------------------------------------- task = TASK_INIT_CONFIG_CACHE; // Initialize cache used to store configuration parms from Calpont.xml Config::initConfigCache(); // Setup signal handlers "again" because HDFS plugin seems to be // changing our settings to ignore ctrl-C and sigterm setupSignalHandlers(); // initialize singleton BRM Wrapper. Also init ExtentRows (in dbrm) from // main thread, since ExtentMap::getExtentRows is not thread safe. BRMWrapper::getInstance()->getInstance()->getExtentRows(); //-------------------------------------------------------------------------- // Validate running on valid node //-------------------------------------------------------------------------- verifyNode( ); //-------------------------------------------------------------------------- // Set scheduling priority for this cpimport.bin process //-------------------------------------------------------------------------- #ifdef _MSC_VER //FIXME #else setpriority( PRIO_PROCESS, 0, Config::getBulkProcessPriority() ); #endif if (bDebug) logInitiateMsg( "Config cache initialized" ); //-------------------------------------------------------------------------- // Make sure DMLProc startup has completed before running a cpimport.bin job //-------------------------------------------------------------------------- task = TASK_BRM_STATE_READY; if (!BRMWrapper::getInstance()->isSystemReady()) { startupError( std::string( "System is not ready. Verify that InfiniDB is up and ready " "before running cpimport."), false ); } if (bDebug) logInitiateMsg( "BRM state verified: state is Ready" ); //-------------------------------------------------------------------------- // Verify that the state of BRM is read/write //-------------------------------------------------------------------------- task = TASK_BRM_STATE_READ_WRITE; int brmReadWriteStatus = BRMWrapper::getInstance()->isReadWrite(); if (brmReadWriteStatus != NO_ERROR) { WErrorCodes ec; std::ostringstream oss; oss << ec.errorString(brmReadWriteStatus) << " cpimport.bin is terminating."; startupError( oss.str(), false ); } if (bDebug) logInitiateMsg( "BRM state is Read/Write" ); //-------------------------------------------------------------------------- // Make sure we're not about to shutdown //-------------------------------------------------------------------------- task = TASK_SHUTDOWN_PENDING; int brmShutdownPending = BRMWrapper::getInstance()->isShutdownPending( bRollback, bForce); if (brmShutdownPending != NO_ERROR) { WErrorCodes ec; std::ostringstream oss; oss << ec.errorString(brmShutdownPending) << " cpimport.bin is terminating."; startupError( oss.str(), false ); } if (bDebug) logInitiateMsg( "Verified no shutdown operation is pending" ); //-------------------------------------------------------------------------- // Make sure we're not write suspended //-------------------------------------------------------------------------- task = TASK_SUSPEND_PENDING; int brmSuspendPending = BRMWrapper::getInstance()->isSuspendPending(); if (brmSuspendPending != NO_ERROR) { WErrorCodes ec; std::ostringstream oss; oss << ec.errorString(brmSuspendPending) << " cpimport.bin is terminating."; startupError( oss.str(), false ); } if (bDebug) logInitiateMsg( "Verified no suspend operation is pending" ); //-------------------------------------------------------------------------- // Set some flags //-------------------------------------------------------------------------- task = TASK_ESTABLISH_JOBFILE; BRMWrapper::setUseVb( false ); Cache::setUseCache ( false ); //-------------------------------------------------------------------------- // Construct temporary Job XML file if user provided schema, job, and // optional load filename. //-------------------------------------------------------------------------- boost::filesystem::path sFileName; bool bUseTempJobFile = false; cout << std::endl; // print blank line before we start // Start tracking time to create/load jobfile; // The elapsed time for this step is logged at the end of loadJobInfo() curJob.startTimer(); if (!xmlGenSchema.empty()) // create temporary job file name { // If JobID is not provided, then default to the table OID if (sJobIdStr.empty()) { std::string tableOIDStr; getTableOID(xmlGenSchema, xmlGenTable, tableOIDStr); cout << "Using table OID " << tableOIDStr << " as the default JOB ID" << std::endl; sJobIdStr = tableOIDStr; } // No need to validate column list in job XML file for user errors, // if cpimport.bin just generated the job XML file on-the-fly. bValidateColumnList = false; bUseTempJobFile = true; constructTempXmlFile(curJob.getTempJobDir(), sJobIdStr, xmlGenSchema, xmlGenTable, curJob.getAlternateImportDir(), sFileName); } else // create user's persistent job file name { // Construct the job description file name std::string xmlErrMsg; rc = XMLJob::genJobXMLFileName( sXMLJobDir, curJob.getJobDir(), sJobIdStr, bUseTempJobFile, std::string(), std::string(), sFileName, xmlErrMsg ); if (rc != NO_ERROR) { std::ostringstream oss; oss << "cpimport.bin error creating Job XML file name: " << xmlErrMsg; startupError( oss.str(), false ); } printInputSource( curJob.getAlternateImportDir(), sFileName.string() ); } if (bDebug) logInitiateMsg( "Job xml file is established" ); //------------------------------------------------------------------------- // Bug 5415 Add HDFS MemBuffer vs. FileBuffer decision logic. // MemoryCheckPercent. This controls at what percent of total memory be // consumed by all processes before we switch from HdfsRdwrMemBuffer to // HdfsRdwrFileBuffer. This is only used in Hdfs installations. //------------------------------------------------------------------------- config::Config* cf = config::Config::makeConfig(); int checkPct = 95; string strCheckPct = cf->getConfig("SystemConfig", "MemoryCheckPercent"); if ( strCheckPct.length() != 0 ) checkPct = cf->uFromText(strCheckPct); //-------------------------------------------------------------------------- // If we're HDFS, start the monitor thread. // Otherwise, we don't need it, so don't waste the resources. //-------------------------------------------------------------------------- if (idbdatafile::IDBPolicy::useHdfs()) { new boost::thread(utils::MonitorProcMem(0, checkPct, SUBSYSTEM_ID_WE_BULK)); } //-------------------------------------------------------------------------- // This is the real business //-------------------------------------------------------------------------- task = TASK_LOAD_JOBFILE; rc = curJob.loadJobInfo( sFileName.string(), bUseTempJobFile, systemLang, argc, argv, bLogInfo2ToConsole, bValidateColumnList ); if( rc != NO_ERROR ) { WErrorCodes ec; std::ostringstream oss; oss << "Error in loading job information; " << ec.errorString(rc) << "; cpimport.bin is terminating."; startupError( oss.str(), false ); } if (bDebug) logInitiateMsg( "Job xml file is loaded" ); task = TASK_PROCESS_DATA; // Log start of job to INFO log logging::Message::Args startMsgArgs; startMsgArgs.add(sJobIdStr); startMsgArgs.add(curJob.getSchema()); SimpleSysLog::instance()->logMsg( startMsgArgs, logging::LOG_TYPE_INFO, logging::M0081); curJob.printJob(); rc = curJob.processJob( ); if( rc != NO_ERROR ) cerr << endl << "Error in loading job data" << endl; } catch (std::exception& ex) { std::ostringstream oss; oss << "Uncaught exception caught in cpimport.bin main() while " << taskLabels[ task ] << "; " << ex.what(); exceptionMsg = oss.str(); if (task != TASK_PROCESS_DATA) { startupError( exceptionMsg, false ); } rc = ERR_UNKNOWN; } //-------------------------------------------------------------------------- // Log end of job to INFO log //-------------------------------------------------------------------------- logging::Message::Args endMsgArgs; endMsgArgs.add(sJobIdStr); if (rc != NO_ERROR) { std::string failMsg("FAILED"); if (exceptionMsg.length() > 0) { failMsg += "; "; failMsg += exceptionMsg; } endMsgArgs.add(failMsg.c_str()); } else { endMsgArgs.add("SUCCESS"); } SimpleSysLog::instance()->logMsg( endMsgArgs, logging::LOG_TYPE_INFO, logging::M0082); if (rc != NO_ERROR) return ( EXIT_FAILURE ); else return ( EXIT_SUCCESS ); }
//------------------------------------------------------------------------------ // Allocates to-be-compressed buffer if it has not already been allocated. // Initializes to-be-compressed buffer with the contents of the chunk containing // the fStartingHwm block, as long as that chunk is in the pointer list. // If the chunk is not in the list, then we must be adding a new chunk, in // which case we just initialize an empty chunk. // Returns startFileOffset which indicates file offset (in bytes) where the // next chunk will be starting. //------------------------------------------------------------------------------ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset) { bool bNewBuffer = false; // Lazy initialization of to-be-compressed buffer if (!fToBeCompressedBuffer) { fToBeCompressedBuffer = new unsigned char[IDBCompressInterface::UNCOMPRESSED_INBUF_LEN]; BlockOp::setEmptyBuf( fToBeCompressedBuffer, IDBCompressInterface::UNCOMPRESSED_INBUF_LEN, fColInfo->column.emptyVal, fColInfo->column.width ); bNewBuffer = true; } // Find the chunk containing the starting HWM, as long as our initial // block skipping has not caused us to exit the HWM chunk; in which // case we start a new empty chunk. unsigned int chunkIndex = 0; unsigned int blockOffsetWithinChunk = 0; if (fPreLoadHWMChunk && (fChunkPtrs.size() > 0)) { fCompressor->locateBlock(fStartingHwm, chunkIndex, blockOffsetWithinChunk); if (chunkIndex < fChunkPtrs.size()) startFileOffset = fChunkPtrs[chunkIndex].first; else fPreLoadHWMChunk = false; } else { fPreLoadHWMChunk = false; } // Preload (read and uncompress) the chunk for the starting HWM extent only if (fPreLoadHWMChunk) { fPreLoadHWMChunk = false; // only preload HWM chunk in the first extent std::ostringstream oss; oss << "Reading HWM chunk for: OID-" << fColInfo->curCol.dataFile.fid << "; DBRoot-" << fColInfo->curCol.dataFile.fDbRoot << "; part-" << fColInfo->curCol.dataFile.fPartition << "; seg-" << fColInfo->curCol.dataFile.fSegment << "; hwm-" << fStartingHwm << "; chunk#-" << chunkIndex << "; blkInChunk-" << blockOffsetWithinChunk; fLog->logMsg( oss.str(), MSGLVL_INFO2 ); // Read the chunk RETURN_ON_ERROR( fColInfo->colOp->setFileOffset( fFile, startFileOffset, SEEK_SET) ); char* compressedOutBuf = new char[ fChunkPtrs[chunkIndex].second ]; boost::scoped_array<char> compressedOutBufPtr(compressedOutBuf); size_t itemsRead = fFile->read(compressedOutBuf, fChunkPtrs[chunkIndex].second) / fChunkPtrs[chunkIndex].second; if (itemsRead != 1) { std::ostringstream oss; oss << "Error reading HWM chunk for: " << "OID-" << fColInfo->curCol.dataFile.fid << "; DBRoot-" << fColInfo->curCol.dataFile.fDbRoot << "; part-" << fColInfo->curCol.dataFile.fPartition << "; seg-" << fColInfo->curCol.dataFile.fSegment << "; hwm-" << fStartingHwm; fLog->logMsg( oss.str(), ERR_COMP_READ_BLOCK, MSGLVL_ERROR ); return ERR_COMP_READ_BLOCK; } // Uncompress the chunk into our 4MB buffer unsigned int outLen = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN; int rc = fCompressor->uncompressBlock( compressedOutBuf, fChunkPtrs[chunkIndex].second, fToBeCompressedBuffer, outLen); if (rc) { WErrorCodes ec; std::ostringstream oss; oss << "Error uncompressing HWM chunk for: " << "OID-" << fColInfo->curCol.dataFile.fid << "; DBRoot-" << fColInfo->curCol.dataFile.fDbRoot << "; part-" << fColInfo->curCol.dataFile.fPartition << "; seg-" << fColInfo->curCol.dataFile.fSegment << "; hwm-" << fStartingHwm << "; " << ec.errorString(rc); fLog->logMsg( oss.str(), rc, MSGLVL_ERROR ); return ERR_COMP_UNCOMPRESS; } fToBeCompressedCapacity = outLen; // Positition ourselves to start adding data to the HWM block fNumBytes = blockOffsetWithinChunk * BYTE_PER_BLOCK; // We are going to add data to, and thus re-add, the last chunk; so we // drop it from our list. fChunkPtrs.resize( fChunkPtrs.size()-1 ); } else // We have left the HWM chunk; just position file offset, // without reading anything { // If it's not a new buffer, we need to initialize, since we won't be // reading in anything to overlay what's in the to-be-compressed buffer. if (!bNewBuffer) { BlockOp::setEmptyBuf( fToBeCompressedBuffer, IDBCompressInterface::UNCOMPRESSED_INBUF_LEN, fColInfo->column.emptyVal, fColInfo->column.width ); } if (fLog->isDebug( DEBUG_2 )) { std::ostringstream oss; oss << "Initializing new empty chunk: OID-" << fColInfo->curCol.dataFile.fid << "; DBRoot-" << fColInfo->curCol.dataFile.fDbRoot << "; part-" << fColInfo->curCol.dataFile.fPartition << "; seg-" << fColInfo->curCol.dataFile.fSegment << "; hwm-" << fStartingHwm; fLog->logMsg( oss.str(), MSGLVL_INFO2 ); } fToBeCompressedCapacity = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN; // Set file offset to start after last current chunk startFileOffset = IDBCompressInterface::HDR_BUF_LEN*2; if (fChunkPtrs.size() > 0) startFileOffset = fChunkPtrs[ fChunkPtrs.size()-1 ].first + fChunkPtrs[ fChunkPtrs.size()-1 ].second; // Positition ourselves to start of empty to-be-compressed buffer fNumBytes = 0; } return NO_ERROR; }
//------------------------------------------------------------------------------ // Final flushing of data and headers prior to closing the file. // File is also truncated if applicable. //------------------------------------------------------------------------------ int ColumnBufferCompressed::finishFile(bool bTruncFile) { // If capacity is 0, we never got far enough to read in the HWM chunk for // the current column segment file, so no need to update the file contents. // But we do continue in case we need to truncate the file before exiting. // This could happen if our initial block skipping finished an extent. if (fToBeCompressedCapacity > 0) { //char resp; //std::cout << "dbg: before finishFile->compressAndFlush" << std::endl; //std::cin >> resp; // Write out any data still waiting to be compressed RETURN_ON_ERROR( compressAndFlush( true ) ); //std::cout << "dbg: after finishFile->compressAndFlush" << std::endl; //std::cin >> resp; } #ifdef PROFILE Stats::startParseEvent(WE_STATS_COMPRESS_COL_FINISH_EXTENT); #endif // Truncate file (if applicable) based on offset and size of last chunk if (bTruncFile && (fChunkPtrs.size() > 0)) { long long truncateFileSize = fChunkPtrs[fChunkPtrs.size()-1].first + fChunkPtrs[fChunkPtrs.size()-1].second; std::ostringstream oss1; oss1 << "Truncating column file" ": OID-" << fColInfo->curCol.dataFile.fid << "; DBRoot-" << fColInfo->curCol.dataFile.fDbRoot << "; part-" << fColInfo->curCol.dataFile.fPartition << "; seg-" << fColInfo->curCol.dataFile.fSegment << "; size-" << truncateFileSize; fLog->logMsg( oss1.str(), MSGLVL_INFO2 ); int rc = NO_ERROR; if (truncateFileSize > 0) rc = fColInfo->colOp->truncateFile( fFile, truncateFileSize ); else rc = ERR_COMP_TRUNCATE_ZERO; //@bug 3913 - Catch truncate to 0 bytes if (rc != NO_ERROR) { WErrorCodes ec; std::ostringstream oss2; oss2 << "finishFile: error truncating file for " << "OID " << fColInfo->curCol.dataFile.fid << "; DBRoot-" << fColInfo->curCol.dataFile.fDbRoot << "; part-" << fColInfo->curCol.dataFile.fPartition << "; seg-" << fColInfo->curCol.dataFile.fSegment << "; size-" << truncateFileSize << "; " << ec.errorString(rc); fLog->logMsg( oss2.str(), rc, MSGLVL_ERROR ); return rc; } } // Nothing more to do if we are not updating the file contents. if (fToBeCompressedCapacity == 0) { #ifdef PROFILE Stats::stopParseEvent(WE_STATS_COMPRESS_COL_FINISH_EXTENT); #endif return NO_ERROR; } fToBeCompressedCapacity = 0; fNumBytes = 0; fChunkPtrs.clear(); #ifdef PROFILE Stats::stopParseEvent(WE_STATS_COMPRESS_COL_FINISH_EXTENT); #endif return NO_ERROR; }
//------------------------------------------------------------------------------ // Intercept data being copied from the raw-data output buffer to the output // file, and instead buffer up the data to be compressed in 4M chunks before // writing it out. //------------------------------------------------------------------------------ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize) { if (writeSize == 0) // skip unnecessary write, if 0 bytes given return NO_ERROR; // If we are starting a new file, we need to reinit the buffer and // find out what our file offset should be set to. if (!fToBeCompressedCapacity) { #ifdef PROFILE Stats::startParseEvent(WE_STATS_COMPRESS_COL_INIT_BUF); #endif long long startFileOffset; int rc = initToBeCompressedBuffer( startFileOffset ); if (rc != NO_ERROR) { WErrorCodes ec; std::ostringstream oss; oss << "writeToFile: error initializing to-be-compressed buffer " "for OID " << fColInfo->curCol.dataFile.fid << "; " << ec.errorString(rc); fLog->logMsg( oss.str(), rc, MSGLVL_ERROR ); return rc; } rc = fColInfo->colOp->setFileOffset(fFile, startFileOffset, SEEK_SET); if (rc != NO_ERROR) { WErrorCodes ec; std::ostringstream oss; oss << "writeToFile: error init compressed file offset for " << "OID " << fColInfo->curCol.dataFile.fid << "; " << startFileOffset << "; " << ec.errorString(rc); fLog->logMsg( oss.str(), rc, MSGLVL_ERROR ); return rc; } #ifdef PROFILE Stats::stopParseEvent(WE_STATS_COMPRESS_COL_INIT_BUF); #endif } unsigned char* bufOffset = fToBeCompressedBuffer + fNumBytes; // Expand the compression buffer size if working with an abbrev extent, and // the bytes we are about to add will overflow the abbreviated extent. if((fToBeCompressedCapacity<IDBCompressInterface::UNCOMPRESSED_INBUF_LEN) && ((fNumBytes + writeSize) > fToBeCompressedCapacity) ) { std::ostringstream oss; oss << "Expanding abbrev to-be-compressed buffer for: OID-" << fColInfo->curCol.dataFile.fid << "; DBRoot-" << fColInfo->curCol.dataFile.fDbRoot << "; part-" << fColInfo->curCol.dataFile.fPartition << "; seg-" << fColInfo->curCol.dataFile.fSegment; fLog->logMsg( oss.str(), MSGLVL_INFO2 ); fToBeCompressedCapacity = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN; } if ((fNumBytes + writeSize) <= fToBeCompressedCapacity) { if (fLog->isDebug( DEBUG_2 )) { std::ostringstream oss; oss << "Buffering data to-be-compressed for: OID-" << fColInfo->curCol.dataFile.fid << "; DBRoot-" << fColInfo->curCol.dataFile.fDbRoot << "; part-" << fColInfo->curCol.dataFile.fPartition << "; seg-" << fColInfo->curCol.dataFile.fSegment << "; addBytes-" << writeSize << "; totBytes-" << (fNumBytes+writeSize); fLog->logMsg( oss.str(), MSGLVL_INFO2 ); } memcpy(bufOffset, (fBuffer + startOffset), writeSize); fNumBytes += writeSize; } else // Not enough room to add all the data to the to-be-compressed buffer { int startOffsetX = startOffset; int writeSizeX = writeSize; // The number of bytes (in fBuffer) to be written, could be larger than // our to-be-compressed buffer, so we require a loop to potentially // iterate thru all the bytes to be compresssed and written from fBuffer while (writeSizeX > 0) { idbassert( (fNumBytes <= fToBeCompressedCapacity) ); // DMC-temp debug size_t writeSizeOut = 0; if ((fNumBytes + writeSizeX) > fToBeCompressedCapacity) { writeSizeOut = fToBeCompressedCapacity - fNumBytes; if (fLog->isDebug( DEBUG_2 )) { std::ostringstream oss; oss << "Buffering data (full) to-be-compressed for: OID-" << fColInfo->curCol.dataFile.fid << "; DBRoot-" << fColInfo->curCol.dataFile.fDbRoot << "; part-" << fColInfo->curCol.dataFile.fPartition << "; seg-" << fColInfo->curCol.dataFile.fSegment << "; addBytes-" << writeSizeOut << "; totBytes-" << (fNumBytes + writeSizeOut); fLog->logMsg( oss.str(), MSGLVL_INFO2 ); } if (writeSizeOut > 0) { memcpy(bufOffset, (fBuffer + startOffsetX), writeSizeOut); fNumBytes += writeSizeOut; } //char resp; //std::cout << "dbg: before writeToFile->compressAndFlush" << // std::endl; //std::cin >> resp; int rc = compressAndFlush( false ); //std::cout << "dbg: after writeToFile->compressAndFlush" << // std::endl; //std::cin >> resp; if (rc != NO_ERROR) { WErrorCodes ec; std::ostringstream oss; oss << "writeToFile: error compressing and writing chunk " "for OID " << fColInfo->curCol.dataFile.fid << "; " << ec.errorString(rc); fLog->logMsg( oss.str(), rc, MSGLVL_ERROR ); return rc; } // Start over again loading a new to-be-compressed buffer BlockOp::setEmptyBuf( fToBeCompressedBuffer, IDBCompressInterface::UNCOMPRESSED_INBUF_LEN, fColInfo->column.emptyVal, fColInfo->column.width ); fToBeCompressedCapacity = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN; bufOffset = fToBeCompressedBuffer; fNumBytes = 0; } else { writeSizeOut = writeSizeX; if (fLog->isDebug( DEBUG_2 )) { std::ostringstream oss; oss << "Buffering data (new) to-be-compressed for: OID-" << fColInfo->curCol.dataFile.fid << "; DBRoot-" << fColInfo->curCol.dataFile.fDbRoot << "; part-" << fColInfo->curCol.dataFile.fPartition << "; seg-" << fColInfo->curCol.dataFile.fSegment << "; addBytes-" << writeSizeOut << "; totBytes-" << (fNumBytes + writeSizeOut); fLog->logMsg( oss.str(), MSGLVL_INFO2 ); } memcpy(bufOffset, (fBuffer + startOffsetX), writeSizeOut); fNumBytes += writeSizeOut; } startOffsetX += writeSizeOut; writeSizeX -= writeSizeOut; } // end of while loop } return NO_ERROR; }
//------------------------------------------------------------------------------ // Fill out existing partial extent to extent boundary, so that we can resume // inserting rows on an extent boundary basis. This use case should only take // place when a DBRoot with a partial extent has been moved from one PM to // another. //------------------------------------------------------------------------------ int ColumnInfoCompressed::extendColumnOldExtent( uint16_t dbRootNext, uint32_t partitionNext, uint16_t segmentNext, HWM hwmNextIn ) { const unsigned int BLKS_PER_EXTENT = (fRowsPerExtent * column.width)/BYTE_PER_BLOCK; // Round up HWM to the end of the current extent unsigned int nBlks = hwmNextIn + 1; unsigned int nRem = nBlks % BLKS_PER_EXTENT; HWM hwmNext = 0; if (nRem > 0) hwmNext = nBlks - nRem + BLKS_PER_EXTENT - 1; else hwmNext = nBlks - 1; std::ostringstream oss; oss << "Padding compressed partial extent to extent boundary in OID-" << curCol.dataFile.fid << "; DBRoot-" << dbRootNext << "; part-" << partitionNext << "; seg-" << segmentNext << "; hwm-" << hwmNext; fLog->logMsg( oss.str(), MSGLVL_INFO2 ); curCol.dataFile.pFile = 0; curCol.dataFile.fDbRoot = dbRootNext; curCol.dataFile.fPartition = partitionNext; curCol.dataFile.fSegment = segmentNext; curCol.dataFile.hwm = hwmNext; curCol.dataFile.fSegFileName.clear(); std::string segFileName; std::string errTask; int rc = colOp->fillCompColumnExtentEmptyChunks( curCol.dataFile.fid, curCol.colWidth, column.emptyVal, curCol.dataFile.fDbRoot, curCol.dataFile.fPartition, curCol.dataFile.fSegment, curCol.dataFile.hwm, segFileName, errTask); if (rc != NO_ERROR) { WErrorCodes ec; std::ostringstream oss; oss << "extendColumnOldExtent: error padding extent (" << errTask << "); " << "column OID-" << curCol.dataFile.fid << "; DBRoot-" << curCol.dataFile.fDbRoot << "; part-" << curCol.dataFile.fPartition << "; seg-" << curCol.dataFile.fSegment << "; newHwm-" << curCol.dataFile.hwm << "; " << ec.errorString(rc); fLog->logMsg( oss.str(), rc, MSGLVL_CRITICAL ); fpTableInfo->fBRMReporter.addToErrMsgEntry(oss.str()); return rc; } addToSegFileList( curCol.dataFile, hwmNext ); return NO_ERROR; }
//------------------------------------------------------------------------------ // Truncate specified dictionary store file for this column. // Only applies to compressed columns. // // This function may logically belong in a dictionary related class, but I did // not particularly want to put a bulk import specific function in Dctnry- // Compress1 (a wrapper class shared with DML/DDL) or Dctnry, so I put it here. // May change my mind later. // // dmc-Not the most efficient implementation. We are reopening // the file to perform the truncation, instead of truncating the file before // we close it. This is done because we need to first flush the compressed // chunks before we can determine the truncation file size. But the Chunk- // Manager flushChunks() function immediately closes the file and clears itself // after if flushes the data. So by the time we get back to the application // code it's too late to truncate the file. At some point, we could look at // adding or changing the ChunkManager API to support a flush w/o a close. // That would be more optimum than having to reopen the file for truncation. //------------------------------------------------------------------------------ int ColumnInfoCompressed::truncateDctnryStore( OID dctnryOid, uint16_t root, uint32_t pNum, uint16_t sNum) const { int rc = NO_ERROR; // @bug5769 Don't initialize extents or truncate db files on HDFS if (erydbdatafile::ERYDBPolicy::useHdfs()) { std::ostringstream oss1; oss1 << "Finished writing dictionary file" ": OID-" << dctnryOid << "; DBRoot-" << root << "; part-" << pNum << "; seg-" << sNum; // Have to rework this logging if we want to keep it. // Filesize is not correct when adding data to an "existing" file, // since in the case of HDFS, we are writing to a *.cdf.tmp file. //char dctnryFileName[FILE_NAME_SIZE]; //if (colOp->getFileName(dctnryOid,dctnryFileName, // root, pNum, sNum) == NO_ERROR) //{ // off64_t dctnryFileSize = erydbdatafile::ERYDBFileSystem::getFs( // ERYDBDataFile::HDFS).size(dctnryFileName); // if (dctnryFileSize != -1) // { // oss1 << "; size-" << dctnryFileSize; // } //} fLog->logMsg( oss1.str(), MSGLVL_INFO2 ); } else { // See if the relevant dictionary store file can/should be truncated // (to the nearest extent) std::string segFile; ERYDBDataFile* dFile = fTruncateDctnryFileOp.openFile(dctnryOid, root, pNum, sNum, segFile); if (dFile == 0) { rc = ERR_FILE_OPEN; std::ostringstream oss; oss << "Error opening compressed dictionary store segment " "file for truncation" << ": OID-" << dctnryOid << "; DbRoot-" << root << "; partition-" << pNum << "; segment-" << sNum; fLog->logMsg( oss.str(), rc, MSGLVL_ERROR ); return rc; } char controlHdr[ ERYDBCompressInterface::HDR_BUF_LEN ]; rc = fTruncateDctnryFileOp.readFile( dFile, (unsigned char*)controlHdr, ERYDBCompressInterface::HDR_BUF_LEN); if (rc != NO_ERROR) { WErrorCodes ec; std::ostringstream oss; oss << "Error reading compressed dictionary store control hdr " "for truncation" << ": OID-" << dctnryOid << "; DbRoot-" << root << "; partition-" << pNum << "; segment-" << sNum << "; " << ec.errorString(rc); fLog->logMsg( oss.str(), rc, MSGLVL_ERROR ); fTruncateDctnryFileOp.closeFile( dFile ); return rc; } ERYDBCompressInterface compressor; int rc1 = compressor.verifyHdr( controlHdr ); if (rc1 != 0) { rc = ERR_COMP_VERIFY_HDRS; WErrorCodes ec; std::ostringstream oss; oss << "Error verifying compressed dictionary store ptr hdr " "for truncation" << ": OID-" << dctnryOid << "; DbRoot-" << root << "; partition-" << pNum << "; segment-" << sNum << "; (" << rc1 << ")"; fLog->logMsg( oss.str(), rc, MSGLVL_ERROR ); fTruncateDctnryFileOp.closeFile( dFile ); return rc; } // No need to perform file truncation if the dictionary file just contains // a single abbreviated extent. Truncating up to the nearest extent would // actually grow the file (something we don't want to do), because we have // not yet reserved a full extent (on disk) for this dictionary store file. const int PSEUDO_COL_WIDTH = 8; uint64_t numBlocks = compressor.getBlockCount( controlHdr ); if ( numBlocks == uint64_t (INITIAL_EXTENT_ROWS_TO_DISK*PSEUDO_COL_WIDTH/BYTE_PER_BLOCK) ) { std::ostringstream oss1; oss1 << "Skip truncating abbreviated dictionary file" ": OID-" << dctnryOid << "; DBRoot-" << root << "; part-" << pNum << "; seg-" << sNum << "; blocks-" << numBlocks; fLog->logMsg( oss1.str(), MSGLVL_INFO2 ); fTruncateDctnryFileOp.closeFile( dFile ); return NO_ERROR; } uint64_t hdrSize = compressor.getHdrSize(controlHdr); uint64_t ptrHdrSize = hdrSize - ERYDBCompressInterface::HDR_BUF_LEN; char* pointerHdr = new char[ptrHdrSize]; rc = fTruncateDctnryFileOp.readFile(dFile, (unsigned char*)pointerHdr, ptrHdrSize); if (rc != NO_ERROR) { WErrorCodes ec; std::ostringstream oss; oss << "Error reading compressed dictionary store pointer hdr " "for truncation" << ": OID-" << dctnryOid << "; DbRoot-" << root << "; partition-" << pNum << "; segment-" << sNum << "; " << ec.errorString(rc); fLog->logMsg( oss.str(), rc, MSGLVL_ERROR ); fTruncateDctnryFileOp.closeFile( dFile ); return rc; } CompChunkPtrList chunkPtrs; rc1 = compressor.getPtrList( pointerHdr, ptrHdrSize, chunkPtrs ); delete[] pointerHdr; if (rc1 != 0) { rc = ERR_COMP_PARSE_HDRS; WErrorCodes ec; std::ostringstream oss; oss << "Error parsing compressed dictionary store ptr hdr " "for truncation" << ": OID-" << dctnryOid << "; DbRoot-" << root << "; partition-" << pNum << "; segment-" << sNum << "; (" << rc1 << ")"; fLog->logMsg( oss.str(), rc, MSGLVL_ERROR ); fTruncateDctnryFileOp.closeFile( dFile ); return rc; } // Truncate the relevant dictionary store file to the nearest extent if (chunkPtrs.size() > 0) { long long dataByteLength = chunkPtrs[chunkPtrs.size()-1].first + chunkPtrs[chunkPtrs.size()-1].second - hdrSize; long long extentBytes = fRowsPerExtent * PSEUDO_COL_WIDTH; long long rem = dataByteLength % extentBytes; if (rem > 0) { dataByteLength = dataByteLength - rem + extentBytes; } long long truncateFileSize = dataByteLength + hdrSize; std::ostringstream oss1; oss1 << "Truncating dictionary file" ": OID-" << dctnryOid << "; DBRoot-" << root << "; part-" << pNum << "; seg-" << sNum << "; size-" << truncateFileSize; fLog->logMsg( oss1.str(), MSGLVL_INFO2 ); if (truncateFileSize > 0) rc = fTruncateDctnryFileOp.truncateFile(dFile,truncateFileSize); else rc = ERR_COMP_TRUNCATE_ZERO;//@bug3913-Catch truncate to 0 bytes if (rc != NO_ERROR) { WErrorCodes ec; std::ostringstream oss; oss << "Error truncating compressed dictionary store file" ": OID-" << dctnryOid << "; DbRoot-" << root << "; partition-" << pNum << "; segment-" << sNum << "; " << ec.errorString(rc); fLog->logMsg( oss.str(), rc, MSGLVL_ERROR ); fTruncateDctnryFileOp.closeFile( dFile ); return rc; } } fTruncateDctnryFileOp.closeFile( dFile ); } return NO_ERROR; }
void SystemCatalog::build() { TxnID txnID = 0; int rc; //int t= 1000; remove(); cout << "Creating System Catalog..." << endl; cout << endl; // SYSTABLE timeval startTime; gettimeofday( &startTime, 0); ostringstream msg; WErrorCodes ec; //------------------------------------------------------------------------------ // Get the DBRoot count, and rotate the tables through those DBRoots. // All the columns in the first table (SYSTABLE) start on DBRoot1, all the // columns in the second table (SYSCOLUMN) start on DBRoot2, etc. //------------------------------------------------------------------------------ config::Config* cf = config::Config::makeConfig(); string root = cf->getConfig("SystemConfig","DBRootCount"); uint32_t dbRootCount = cf->uFromText(root); //------------------------------------------------------------------------------ // Create SYSTABLE table //------------------------------------------------------------------------------ uint32_t dbRoot = 1; int compressionType = 0; uint32_t partition = 0; uint16_t segment=0; ResourceManager rm; std::map<uint32_t,uint32_t> oids; if( rm.useHdfs() ) { compressionType = 2; oids[OID_SYSTABLE_TABLENAME] = OID_SYSTABLE_TABLENAME; oids[DICTOID_SYSTABLE_TABLENAME] = DICTOID_SYSTABLE_TABLENAME; oids[OID_SYSTABLE_SCHEMA] = OID_SYSTABLE_SCHEMA; oids[DICTOID_SYSTABLE_SCHEMA] = DICTOID_SYSTABLE_SCHEMA; oids[OID_SYSTABLE_OBJECTID] = OID_SYSTABLE_OBJECTID; oids[OID_SYSTABLE_CREATEDATE] = OID_SYSTABLE_CREATEDATE; oids[OID_SYSTABLE_LASTUPDATE] = OID_SYSTABLE_LASTUPDATE; oids[OID_SYSTABLE_INIT] = OID_SYSTABLE_INIT; oids[OID_SYSTABLE_NEXT] = OID_SYSTABLE_NEXT; oids[OID_SYSTABLE_NUMOFROWS] = OID_SYSTABLE_NUMOFROWS; oids[OID_SYSTABLE_AVGROWLEN] = OID_SYSTABLE_AVGROWLEN; oids[OID_SYSTABLE_NUMOFBLOCKS] = OID_SYSTABLE_NUMOFBLOCKS; oids[OID_SYSTABLE_AUTOINCREMENT] = OID_SYSTABLE_AUTOINCREMENT; } fWriteEngine.setTransId(1); fWriteEngine.setBulkFlag(true); cout << "Creating SYSTABLE" << endl; cout << "---------------------------------------" << endl; // TableName msg << " Creating TableName column OID: "<< OID_SYSTABLE_TABLENAME; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSTABLE_TABLENAME, erydbSystemCatalog::VARCHAR, 40, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); msg << " Creating TableName column dictionary"; //Dictionary files cout << msg.str() << endl; rc = fWriteEngine.createDctnry(txnID, DICTOID_SYSTABLE_TABLENAME, 65, dbRoot, partition, segment, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // Schema msg << " Creating Schema column OID: "<<OID_SYSTABLE_SCHEMA; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSTABLE_SCHEMA, erydbSystemCatalog::VARCHAR, 40, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(" Creating Schema column dictionary"); cout << msg.str() << endl; //Dictionary files rc = fWriteEngine.createDctnry(txnID, DICTOID_SYSTABLE_SCHEMA, 65, dbRoot, partition, segment, compressionType); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // ObjectId msg << " Creating ObjectId column OID: " <<OID_SYSTABLE_OBJECTID; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSTABLE_OBJECTID, erydbSystemCatalog::INT, 4, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // CreateDate msg << " Creating CreateDate column OID: "<<OID_SYSTABLE_CREATEDATE; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSTABLE_CREATEDATE, erydbSystemCatalog::DATE, 4, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // LastUpdateDate msg << " Creating LastUpdate column OID: "<<OID_SYSTABLE_LASTUPDATE; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSTABLE_LASTUPDATE, erydbSystemCatalog::DATE, 4, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // INIT msg << " Creating INIT column OID: "<<OID_SYSTABLE_INIT; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSTABLE_INIT, erydbSystemCatalog::INT, 4, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // NEXT msg << " Creating NEXT column OID: "<<OID_SYSTABLE_NEXT; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSTABLE_NEXT, erydbSystemCatalog::INT, 4, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); //NUMOFROWS msg << " Creating NUMOFROWS column OID: "<<OID_SYSTABLE_NUMOFROWS; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSTABLE_NUMOFROWS, erydbSystemCatalog::INT, 4, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); //AVGROWLEN msg << " Creating AVGROWLEN column OID: "<<OID_SYSTABLE_AVGROWLEN; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSTABLE_AVGROWLEN, erydbSystemCatalog::INT, 4, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); //NUMOFBLOCKS msg << " Creating NUMOFBLOCKS column OID: "<<OID_SYSTABLE_NUMOFBLOCKS; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSTABLE_NUMOFBLOCKS, erydbSystemCatalog::INT, 4, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); //AUTOINCREMENT msg << " Creating AUTOINCREMENT column OID: "<<OID_SYSTABLE_AUTOINCREMENT; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSTABLE_AUTOINCREMENT, erydbSystemCatalog::INT, 4, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); //------------------------------------------------------------------------------ // Create SYSCOLUMN table //------------------------------------------------------------------------------ //dbRoot++; //if (dbRoot > dbRootCount) // dbRoot = 1; //SYSCOLUMN if( rm.useHdfs() ) { oids[OID_SYSCOLUMN_SCHEMA] = OID_SYSCOLUMN_SCHEMA; oids[DICTOID_SYSCOLUMN_SCHEMA] = DICTOID_SYSCOLUMN_SCHEMA; oids[OID_SYSCOLUMN_TABLENAME] = OID_SYSCOLUMN_TABLENAME; oids[DICTOID_SYSCOLUMN_TABLENAME] = DICTOID_SYSCOLUMN_TABLENAME; oids[OID_SYSCOLUMN_COLNAME] = OID_SYSCOLUMN_COLNAME; oids[DICTOID_SYSCOLUMN_COLNAME] = DICTOID_SYSCOLUMN_COLNAME; oids[OID_SYSCOLUMN_OBJECTID] = OID_SYSCOLUMN_OBJECTID; oids[OID_SYSCOLUMN_DICTOID] = OID_SYSCOLUMN_DICTOID; oids[OID_SYSCOLUMN_LISTOBJID] = OID_SYSCOLUMN_LISTOBJID; oids[OID_SYSCOLUMN_TREEOBJID] = OID_SYSCOLUMN_TREEOBJID; oids[OID_SYSCOLUMN_DATATYPE] = OID_SYSCOLUMN_DATATYPE; oids[OID_SYSCOLUMN_COLUMNLEN] = OID_SYSCOLUMN_COLUMNLEN; oids[OID_SYSCOLUMN_COLUMNPOS] = OID_SYSCOLUMN_COLUMNPOS; oids[OID_SYSCOLUMN_LASTUPDATE] = OID_SYSCOLUMN_LASTUPDATE; oids[OID_SYSCOLUMN_DEFAULTVAL] = OID_SYSCOLUMN_DEFAULTVAL; oids[DICTOID_SYSCOLUMN_DEFAULTVAL] = DICTOID_SYSCOLUMN_DEFAULTVAL; oids[OID_SYSCOLUMN_NULLABLE] = OID_SYSCOLUMN_NULLABLE; oids[OID_SYSCOLUMN_SCALE] = OID_SYSCOLUMN_SCALE; oids[OID_SYSCOLUMN_PRECISION] = OID_SYSCOLUMN_PRECISION; oids[OID_SYSCOLUMN_AUTOINC] = OID_SYSCOLUMN_AUTOINC; oids[OID_SYSCOLUMN_DISTCOUNT] = OID_SYSCOLUMN_DISTCOUNT; oids[OID_SYSCOLUMN_NULLCOUNT] = OID_SYSCOLUMN_NULLCOUNT; oids[OID_SYSCOLUMN_MINVALUE] = OID_SYSCOLUMN_MINVALUE; oids[DICTOID_SYSCOLUMN_MINVALUE] = DICTOID_SYSCOLUMN_MINVALUE; oids[OID_SYSCOLUMN_MAXVALUE] = OID_SYSCOLUMN_MAXVALUE; oids[DICTOID_SYSCOLUMN_MAXVALUE] = DICTOID_SYSCOLUMN_MAXVALUE; oids[OID_SYSCOLUMN_COMPRESSIONTYPE] = OID_SYSCOLUMN_COMPRESSIONTYPE; oids[OID_SYSCOLUMN_NEXTVALUE] = OID_SYSCOLUMN_NEXTVALUE; } cout<< endl; cout << "Creating SYSCOLUMN" << endl; // Schema cout << "---------------------------------------" << endl; msg << " Creating Schema column OID: "<<OID_SYSCOLUMN_SCHEMA; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSCOLUMN_SCHEMA, erydbSystemCatalog::VARCHAR, 40, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(" Creating Schema column dictionary..."); //Dictionary files cout << msg.str() << endl; rc = fWriteEngine.createDctnry(txnID, DICTOID_SYSCOLUMN_SCHEMA, 65, dbRoot, partition, segment, compressionType); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // TableName msg << " Creating TableName column OID: "<<OID_SYSCOLUMN_TABLENAME; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSCOLUMN_TABLENAME, erydbSystemCatalog::VARCHAR, 40, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(" Creating TableName column dictionary..."); //Dictionary files cout << msg.str() << endl; rc = fWriteEngine.createDctnry(txnID, DICTOID_SYSCOLUMN_TABLENAME, 65, dbRoot, partition, segment, compressionType); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // ColumnName msg << " Creating ColumnName column OID: "<<OID_SYSCOLUMN_COLNAME; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSCOLUMN_COLNAME, erydbSystemCatalog::VARCHAR, 40, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(" Creating ColumnName column dictionary..."); //Dictionary files cout << msg.str() << endl; rc = fWriteEngine.createDctnry(txnID, DICTOID_SYSCOLUMN_COLNAME, 65, dbRoot, partition, segment, compressionType); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // ObjectID msg << " Creating ObjectID column OID: "<<OID_SYSCOLUMN_OBJECTID; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSCOLUMN_OBJECTID, erydbSystemCatalog::INT, 4, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // DictOID msg << " Creating DictOID column OID: "<<OID_SYSCOLUMN_DICTOID; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSCOLUMN_DICTOID, erydbSystemCatalog::INT, 4, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // ListOID msg << " Creating ListOID column OID: "<< OID_SYSCOLUMN_LISTOBJID; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSCOLUMN_LISTOBJID, erydbSystemCatalog::INT, 4, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // TreeOID msg << " Creating TreeOID column OID: "<< OID_SYSCOLUMN_TREEOBJID; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSCOLUMN_TREEOBJID, erydbSystemCatalog::INT, 4, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // DataType msg << " Creating DataType column OID: "<< OID_SYSCOLUMN_DATATYPE; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSCOLUMN_DATATYPE, erydbSystemCatalog::INT, 4, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // ColumnLength msg << " Creating ColumnLength column OID: "<< OID_SYSCOLUMN_COLUMNLEN; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSCOLUMN_COLUMNLEN, erydbSystemCatalog::INT, 4, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // ColumnPos msg << " Creating ColumnPos column OID: "<<OID_SYSCOLUMN_COLUMNPOS; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSCOLUMN_COLUMNPOS, erydbSystemCatalog::INT, 4, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // LastUpdate msg << " Creating LastUpdate column OID: "<< OID_SYSCOLUMN_LASTUPDATE; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSCOLUMN_LASTUPDATE, erydbSystemCatalog::DATE, 4, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // DefaultValue msg << " Creating DefaultValue column OID: "<< OID_SYSCOLUMN_DEFAULTVAL; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSCOLUMN_DEFAULTVAL, erydbSystemCatalog::VARCHAR, 8, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); msg.str(" Creating DefaultValue column dictionary..."); //Dictionary files cout << msg.str() << endl; rc = fWriteEngine.createDctnry(txnID, DICTOID_SYSCOLUMN_DEFAULTVAL, 9, dbRoot, partition, segment, compressionType); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // Nullable msg << " Creating Nullable column OID: "<<OID_SYSCOLUMN_NULLABLE; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSCOLUMN_NULLABLE, erydbSystemCatalog::INT, 4, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // Scale msg << " Creating Scale column OID: "<<OID_SYSCOLUMN_SCALE; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSCOLUMN_SCALE, erydbSystemCatalog::INT, 4, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // Precision msg << " Creating Precision column OID: "<<OID_SYSCOLUMN_PRECISION; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSCOLUMN_PRECISION, erydbSystemCatalog::INT, 4, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // AutoInc msg << " Creating AutoInc column OID: "<<OID_SYSCOLUMN_AUTOINC; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSCOLUMN_AUTOINC, erydbSystemCatalog::CHAR, 1, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // DISTCOUNT msg << " Creating DISTCOUNT column OID: "<<OID_SYSCOLUMN_DISTCOUNT; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSCOLUMN_DISTCOUNT, erydbSystemCatalog::INT, 4, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // NULLCOUNT msg << " Creating NULLCOUNT column OID: "<<OID_SYSCOLUMN_NULLCOUNT; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSCOLUMN_NULLCOUNT, erydbSystemCatalog::INT, 4, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // MINVALUE msg << " Creating MINVALUE column OID: "<<OID_SYSCOLUMN_MINVALUE; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSCOLUMN_MINVALUE, erydbSystemCatalog::VARCHAR, 40, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(" Creating MINVALUE column dictionary..."); cout << msg.str() << endl; //Dictionary files rc = fWriteEngine.createDctnry(txnID, DICTOID_SYSCOLUMN_MINVALUE, 65, dbRoot, partition, segment, compressionType); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // MAXVALUE msg << " Creating MAXVALUE column OID: "<<OID_SYSCOLUMN_MAXVALUE; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSCOLUMN_MAXVALUE, erydbSystemCatalog::VARCHAR, 40, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(" Creating MAXVALUE column dictionary..."); //Dictionary files cout << msg.str() << endl; rc = fWriteEngine.createDctnry(txnID, DICTOID_SYSCOLUMN_MAXVALUE, 65, dbRoot, partition, segment, compressionType); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); // CompressionType msg << " Creating CompressionType column OID: "<<OID_SYSCOLUMN_COMPRESSIONTYPE; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSCOLUMN_COMPRESSIONTYPE, erydbSystemCatalog::INT, 4, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); // nextvalue msg << " Creating NEXTVALUE column OID: "<<OID_SYSCOLUMN_NEXTVALUE; cout << msg.str() << endl; rc = fWriteEngine.createColumn( txnID, OID_SYSCOLUMN_NEXTVALUE, erydbSystemCatalog::UBIGINT, 8, dbRoot, partition, compressionType ); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); msg.str(""); //------------------------------------------------------------------------------ // Create SYSCONSTRAINT table //------------------------------------------------------------------------------ dbRoot++; if (dbRoot > dbRootCount) dbRoot = 1; //flush data files fWriteEngine.flushDataFiles(rc, 1, oids); // save brm msg.str(" BRMWrapper saving state "); rc = BRMWrapper::getInstance()->saveState(); if (rc) throw runtime_error(msg.str() + ec.errorString(rc)); timeval endTime; gettimeofday( &endTime, 0); double elapsedTime = (endTime.tv_sec + (endTime.tv_usec / 1000000.0)) - (startTime.tv_sec + (startTime.tv_usec / 1000000.0)); cout << "System Catalog creation took: " << elapsedTime << " seconds to complete." << endl; cout << endl; cout << "System Catalog created" << endl; cout << endl; }
//------------------------------------------------------------------------------ // Writes the specified bytes from the internal buffer to the db column file. // The data to be written, starts at "startOffset" in the internal buffer and // is "writeSize" bytes long. // This function also checks to see if an extent needs to be added to the db // column file for this number of bytes. If a second extent is required, // then the current db file will be filled out with the 1st part of the buffer, // and the remaining buffer data will be written to the next segment file in // the DBRoot, partition, segement number sequence. // This function also catches and handles the case where an abbreviated // extent needs to be expanded to a full extent on disk. // // WARNING: This means this function may change the information in the // ColumnInfo struct that owns this ColumnBufferManager, if a // second db column file has to be opened to finish writing the // internal buffer, or if an abbreviated extent is expanded. //------------------------------------------------------------------------------ int ColumnBufferManager::writeToFileExtentCheck( uint32_t startOffset, uint32_t writeSize) { if (fLog->isDebug( DEBUG_3 )) { std::ostringstream oss; oss << "Col extent check: OID-" << fColInfo->curCol.dataFile.fid << "; DBRoot-" << fColInfo->curCol.dataFile.fDbRoot << "; part-" << fColInfo->curCol.dataFile.fPartition << "; seg-" << fColInfo->curCol.dataFile.fSegment << "; Wanting to write " << writeSize << " bytes, with avail space " << fColInfo->availFileSize; fLog->logMsg( oss.str(), MSGLVL_INFO2 ); } // Don't need a mutex lock here because if writeToFile() is calling // us, we already have a lock; and if flush() is calling us, then // all parsing is complete, so we should have no thread contention. // If extent out of space, see if this is an abbrev extent we can expand long long availableFileSize = fColInfo->availFileSize; if ((availableFileSize < writeSize) && (fColInfo->isAbbrevExtent())) { int rc = fColInfo->expandAbbrevExtent(true); if (rc != NO_ERROR) { WErrorCodes ec; std::ostringstream oss; oss << "writeToFileExtentCheck: expand extent failed: " << ec.errorString(rc); fLog->logMsg( oss.str(), rc, MSGLVL_ERROR ); return rc; } availableFileSize = fColInfo->availFileSize; } if (availableFileSize >= writeSize) { int rc = fCBuf->writeToFile(startOffset, writeSize); if (rc != NO_ERROR) { WErrorCodes ec; std::ostringstream oss; oss << "writeToFileExtentCheck: write1 extent failed: " << ec.errorString(rc); fLog->logMsg( oss.str(), rc, MSGLVL_ERROR ); return rc; } fColInfo->updateBytesWrittenCounts( writeSize ); } else { // We use ColumnInfo to help us add an extent to the "next" // segment file, if needed. // Current extent does not have enough room for buffer, so we // have to break up the buffer into 2 extents; creating a new // extent and switching the db column file "on-the-fly". int writeSize1 = availableFileSize; if (writeSize1 > 0) { int rc = fCBuf->writeToFile(startOffset, writeSize1); if (rc != NO_ERROR) { WErrorCodes ec; std::ostringstream oss; oss << "writeToFileExtentCheck: write2 extent failed: " << ec.errorString(rc); fLog->logMsg( oss.str(), rc, MSGLVL_ERROR ); return rc; } fColInfo->updateBytesWrittenCounts( writeSize1 ); } int rc = fColInfo->extendColumn( true ); if (rc != NO_ERROR) { WErrorCodes ec; std::ostringstream oss; oss << "writeToFileExtentCheck: extend column failed: " << ec.errorString(rc); fLog->logMsg( oss.str(), rc, MSGLVL_ERROR ); return rc; } int writeSize2 = writeSize - writeSize1; fCBuf->writeToFile(startOffset+writeSize1, writeSize2); if (rc != NO_ERROR) { WErrorCodes ec; std::ostringstream oss; oss << "writeToFileExtentCheck: write3 extent failed: " << ec.errorString(rc); fLog->logMsg( oss.str(), rc, MSGLVL_ERROR ); return rc; } fColInfo->updateBytesWrittenCounts( writeSize2 ); } return NO_ERROR; }
//------------------------------------------------------------------------------ // Finished working with this auto-increment. Any remaining steps that are // necessary to save or commit changes to the auto-increment nextValue, are // applied here. //------------------------------------------------------------------------------ int ColumnAutoInc::finish( ) { int rc = NO_ERROR; // We intentionally use a separate DBRM instance in this function. We don't // use the BRMWrapper singleton. We do this because the BRM call that is // made to issue a lock is a synchronous call that will block till a lock // is acquired. Better to do this in a separate BRM instance, rather than // having this call block any other thread using BRM. BRM::DBRM dbrm; // We grab AI lock in order to access/synchronize DBRM and the system // catalog as a single operation, to avoid race condition between apps. try { dbrm.getAILock( fColumnOID ); } catch (std::exception& ex) { std::ostringstream oss; oss << "Error locking auto-increment nextValue lock for table " << fTableName << "; column " << fColumnName << "; " << ex.what(); fLog->logMsg( oss.str(), ERR_AUTOINC_GET_LOCK, MSGLVL_ERROR ); BulkLoad::addErrorMsg2BrmUpdater(fTableName, oss); return ERR_AUTOINC_GET_LOCK; } uint64_t sysCatNextAuto = 0; rc = getNextValueFromSysCat( sysCatNextAuto ); if (rc == NO_ERROR) { // Update system catalog if my latest AI nextValue is > the current // syscat AI nextValue. max(uint64_t) denotes an AI column that has maxed out. uint64_t myNextValue = getNextAutoIncToSave(); if ( (sysCatNextAuto != AUTOINCR_SATURATED) && // do not update if syscat already at max ((myNextValue > sysCatNextAuto) || (myNextValue == AUTOINCR_SATURATED)) ) { std::ostringstream oss2; oss2 << "Updating next auto increment for table-" << fTableName << ", column-" << fColumnName << "; autoincrement " << myNextValue; fLog->logMsg( oss2.str(), MSGLVL_INFO2 ); rc = BulkLoad::updateNextValue( fColumnOID, myNextValue ); if (rc != NO_ERROR) { WErrorCodes ec; std::ostringstream oss; oss << "Error updating auto-increment nextValue for table " << fTableName << "; column " << fColumnName << "; rc=" << rc << "; " << ec.errorString(ERR_AUTOINC_UPDATE); fLog->logMsg( oss.str(), ERR_AUTOINC_UPDATE, MSGLVL_ERROR ); BulkLoad::addErrorMsg2BrmUpdater(fTableName, oss); // Don't exit this function yet. We set return code and fall // through to bottom of the function to release the AI lock. rc = ERR_AUTOINC_UPDATE; } } else { std::ostringstream oss2; oss2 << "Skip updating next auto increment for table-" << fTableName << ", column-" << fColumnName << "; autoincrement " << myNextValue << "; syscat AI already at " << sysCatNextAuto; fLog->logMsg( oss2.str(), MSGLVL_INFO2 ); } } // end of rc==NO_ERROR from getNextValueFromSysCat() try { dbrm.releaseAILock( fColumnOID ); } catch (std::exception& ex) { // If we have trouble releasing AI lock, we log it, but we don't // consider it fatal to the job; so we don't return bad return code. std::ostringstream oss; oss << "Error releasing auto-increment nextValue lock for table " << fTableName << "; column " << fColumnName << "; " << ex.what(); fLog->logMsg( oss.str(), ERR_AUTOINC_REL_LOCK, MSGLVL_WARNING ); //return ERR_AUTOINC_REL_LOCK; } return rc; }