//------------------------------------------------------------------------------ // Send collection information (Casual Partition and HWM) to applicable // destination. If file name given, then data is saved to the file, else // the data is sent directly to BRM. //------------------------------------------------------------------------------ int BRMReporter::sendBRMInfo(const std::string& rptFileName, const std::vector<std::string>& errFiles, const std::vector<std::string>& badFiles) { int rc = NO_ERROR; //Purge PrimProc FD cache if (( fFileInfo.size() > 0 ) && idbdatafile::IDBPolicy::useHdfs()) { cacheutils::purgePrimProcFdCache(fFileInfo, Config::getLocalModuleID()); } if (rptFileName.empty()) { // Set Casual Partition (CP) info for BRM for this column. Be sure to // do this before we set the HWM. Updating HWM 1st could cause a race // condition resulting in a query being based on temporary outdated CP // info. rc = sendHWMandCPToBRM( ); // If HWM error occurs, we fail the job. if (rc != NO_ERROR) { return rc; } } else { fRptFileName = rptFileName; rc = openRptFile( ); if (rc != NO_ERROR) { return rc; } sendCPToFile ( ); sendHWMToFile( ); // Log the list of *.err and *.bad files for (unsigned k=0; k<errFiles.size(); k++) { fRptFile << "ERR: " << errFiles[k] << std::endl; } for (unsigned k=0; k<badFiles.size(); k++) { fRptFile << "BAD: " << badFiles[k] << std::endl; } } return rc; }
//------------------------------------------------------------------------------ // Send collection information (Casual Partition and HWM) to applicable // destination. If file name given, then data is saved to the file, else // the data is sent directly to BRM. // // On HDFS system, this function also notifies PrimProc to flush certain file // descriptors (for columns and dictionary store), and blocks (for dictionary // store). Any DB file changes should have been "confirmed" prior to calling // sendBRMInfo(). Once PrimProc cache is flushed, we can send the BRM updates. //------------------------------------------------------------------------------ int BRMReporter::sendBRMInfo(const std::string& rptFileName, const std::vector<std::string>& errFiles, const std::vector<std::string>& badFiles) { int rc = NO_ERROR; // For HDFS, we need to flush PrimProc cache since we modify HDFS files // by rewriting the files. if (idbdatafile::IDBPolicy::useHdfs()) { std::vector<BRM::FileInfo> allFileInfo; if ( fFileInfo.size() > 0 ) { for (unsigned k=0; k<fFileInfo.size(); k++) { allFileInfo.push_back( fFileInfo[k] ); } } std::vector<BRM::OID_t> oidsToFlush; std::set<BRM::OID_t> oidSet; if (fDctnryFileInfo.size() > 0) { for (unsigned k=0; k<fDctnryFileInfo.size(); k++) { allFileInfo.push_back( fDctnryFileInfo[k] ); oidSet.insert( fDctnryFileInfo[k].oid ); } // Store dictionary oids in std::set first, to eliminate duplicates if (oidSet.size() > 0) { for (std::set<BRM::OID_t>::const_iterator iter=oidSet.begin(); iter != oidSet.end(); ++iter) { oidsToFlush.push_back( *iter ); } } } // Flush PrimProc FD cache if (allFileInfo.size() > 0) { cacheutils::purgePrimProcFdCache(allFileInfo, Config::getLocalModuleID()); } // Flush PrimProc block cache if (oidsToFlush.size() > 0) cacheutils::flushOIDsFromCache(oidsToFlush); } // After flushing cache (for HDFS), now we can update BRM if (rptFileName.empty()) { // Set Casual Partition (CP) info for BRM for this column. Be sure to // do this before we set the HWM. Updating HWM 1st could cause a race // condition resulting in a query being based on temporary outdated CP // info. rc = sendHWMandCPToBRM( ); // If HWM error occurs, we fail the job. if (rc != NO_ERROR) { return rc; } } else { fRptFileName = rptFileName; rc = openRptFile( ); if (rc != NO_ERROR) { return rc; } sendCPToFile ( ); sendHWMToFile( ); // Log the list of *.err and *.bad files for (unsigned k=0; k<errFiles.size(); k++) { fRptFile << "ERR: " << errFiles[k] << std::endl; } for (unsigned k=0; k<badFiles.size(); k++) { fRptFile << "BAD: " << badFiles[k] << std::endl; } } return rc; }