void ovStore::ovStore_write(void) { AS_UTL_mkdir(_storePath); char name[FILENAME_MAX]; sprintf(name, "%s/info", _storePath); // If the ovs file exists, AND has a valid magic number, then the store is complete and we should // abort before the valid store is destroyed. if (AS_UTL_fileExists(name, false, false)) { errno = 0; FILE *ovsinfo = fopen(name, "r"); if (errno) { fprintf(stderr, "ERROR: failed to read store metadata from '%s': %s\n", name, strerror(errno)); exit(1); } AS_UTL_safeRead(ovsinfo, &_info, "ovStore::ovStore::testinfo", sizeof(ovStoreInfo), 1); fclose(ovsinfo); if (_info._ovsMagic == ovStoreMagic) fprintf(stderr, "ERROR: overlapStore '%s' is a valid overlap store, will not overwrite.\n", _storePath), exit(1); } // Create a new incomplete info file. errno = 0; FILE *ovsinfo = fopen(name, "w"); if (errno) fprintf(stderr, "failed to create overlap store '%s': %s\n", _storePath, strerror(errno)), exit(1); AS_UTL_safeWrite(ovsinfo, &_info, "ovStore::ovStore::saveinfo", sizeof(ovStoreInfo), 1); fclose(ovsinfo); sprintf(name, "%s/index", _storePath); errno = 0; _offtFile = fopen(name, "w"); if (errno) fprintf(stderr, "AS_OVS_createOverlapStore()-- failed to open offset file '%s': %s\n", name, strerror(errno)), exit(1); _overlapsThisFile = 0; _currentFileIndex = 0; _bof = NULL; }
bool FragmentInfo::load(const char *prefix) { char name[FILENAME_MAX]; sprintf(name, "%s.fragmentInfo", prefix); errno = 0; FILE *file = fopen(name, "r"); if (errno) return(false); uint64 magicNumber = 0; uint64 versionNumber = 0; AS_UTL_safeRead(file, &magicNumber, "fragmentInformationMagicNumber", sizeof(uint64), 1); AS_UTL_safeRead(file, &versionNumber, "fragmentInformationVersionNumber", sizeof(uint64), 1); AS_UTL_safeRead(file, &_numFragments, "fragmentInformationNumFrgs", sizeof(uint32), 1); AS_UTL_safeRead(file, &_numLibraries, "fragmentInformationNumLibs", sizeof(uint32), 1); if (magicNumber != fiMagicNumber) { writeLog("FragmentInfo()-- File '%s' is not a fragment info; cannot load.\n", name); fclose(file); return(false); } if (versionNumber != fiVersionNumber) { writeLog("FragmentInfo()-- File '%s' is version "F_U64", I can only read version "F_U64"; cannot load.\n", name, versionNumber, fiVersionNumber); fclose(file); return(false); } writeLog("FragmentInfo()-- Loading fragment information for "F_U32" fragments and "F_U32" libraries from cache '%s'\n", _numFragments, _numLibraries, name); _fragLength = new uint32 [_numFragments + 1]; _libIID = new uint32 [_numFragments + 1]; _numFragsInLib = new uint32 [_numLibraries + 1]; AS_UTL_safeRead(file, _fragLength, "fragmentInformationFragLen", sizeof(uint32), _numFragments + 1); AS_UTL_safeRead(file, _libIID, "fragmentInformationLibIID", sizeof(uint32), _numFragments + 1); AS_UTL_safeRead(file, _numFragsInLib, "fragmentInformationNumFrgsInLib", sizeof(uint32), _numLibraries + 1); fclose(file); return(true); }
void ovStore::setRange(uint32 firstIID, uint32 lastIID) { char name[FILENAME_MAX]; // make the index be one record per read iid, regardless, then we // can quickly grab the correct record, and seek to the start of // those overlaps if (firstIID > _info._largestIID) firstIID = _info._largestIID + 1; if (lastIID >= _info._largestIID) lastIID = _info._largestIID; // If our range is invalid (firstIID > lastIID) we keep going, and // let readOverlap() deal with it. AS_UTL_fseek(_offtFile, (size_t)firstIID * sizeof(ovStoreOfft), SEEK_SET); // Unfortunately, we need to actually read the record to figure out // where to position the overlap stream. If the read fails, we // silently return, letting readOverlap() deal with // the problem. _offt.clear(); // Everything should notice that offsetFile is at EOF and not try // to find overlaps, but, just in case, we set invalid first/last // IIDs. // _firstIIDrequested = firstIID; _lastIIDrequested = lastIID; if (0 == AS_UTL_safeRead(_offtFile, &_offt, "ovStore::setRange::offset", sizeof(ovStoreOfft), 1)) return; _overlapsThisFile = 0; _currentFileIndex = _offt._fileno; delete _bof; sprintf(name, "%s/%04d", _storePath, _currentFileIndex); _bof = new ovFile(name, ovFileNormal); _bof->seekOverlap(_offt._offset); }
uint32 ovStore::readOverlap(ovOverlap *overlap) { assert(_isOutput == FALSE); // If we've finished reading overlaps for the current a_iid, get // another a_iid. If we hit EOF here, we're all done, no more // overlaps. while (_offt._numOlaps == 0) if (0 == AS_UTL_safeRead(_offtFile, &_offt, "ovStore::readOverlap::offset", sizeof(ovStoreOfft), 1)) return(0); // And if we've exited the range of overlaps requested, return. if (_offt._a_iid > _lastIIDrequested) return(0); while ((_bof == NULL) || (_bof->readOverlap(overlap) == FALSE)) { char name[FILENAME_MAX]; // We read no overlap, open the next file and try again. if (_bof) delete _bof; _currentFileIndex++; sprintf(name, "%s/%04d", _storePath, _currentFileIndex); _bof = new ovFile(name, ovFileNormal); } overlap->a_iid = _offt._a_iid; overlap->g = _gkp; if (_evalues) overlap->evalue(_evalues[_offt._overlapID++]); _offt._numOlaps--; return(1); }
uint32 * ovStore::numOverlapsPerFrag(uint32 &firstFrag, uint32 &lastFrag) { if (_firstIIDrequested > _lastIIDrequested) return(NULL); firstFrag = _firstIIDrequested; lastFrag = _lastIIDrequested; size_t originalPosition = AS_UTL_ftell(_offtFile); AS_UTL_fseek(_offtFile, (size_t)_firstIIDrequested * sizeof(ovStoreOfft), SEEK_SET); // Even if we're doing a whole human-size store, this allocation is // (a) temporary and (b) only 512MB. The only current consumer of // this code is FragCorrectOVL.c, which doesn't run on the whole // human, it runs on ~24 pieces, which cuts this down to < 32MB. uint64 len = _lastIIDrequested - _firstIIDrequested + 1; ovStoreOfft *offsets = new ovStoreOfft [len]; uint32 *numolap = new uint32 [len]; uint64 act = AS_UTL_safeRead(_offtFile, offsets, "ovStore::numOverlapsInRange::offsets", sizeof(ovStoreOfft), len); if (len != act) fprintf(stderr, "AS_OVS_numOverlapsPerFrag()-- short read on offsets! Expected len="F_U64" read act="F_U64"\n", len, act), exit(1); for (uint64 i=0; i<len; i++) numolap[i] = offsets[i]._numOlaps; delete [] offsets; AS_UTL_fseek(_offtFile, originalPosition, SEEK_SET); return(numolap); }
uint64 ovStore::numOverlapsInRange(void) { size_t originalposition = 0; uint64 i = 0; uint64 len = 0; ovStoreOfft *offsets = NULL; uint64 numolap = 0; if (_firstIIDrequested > _lastIIDrequested) return(0); originalposition = AS_UTL_ftell(_offtFile); AS_UTL_fseek(_offtFile, (size_t)_firstIIDrequested * sizeof(ovStoreOfft), SEEK_SET); // Even if we're doing a whole human-size store, this allocation is // (a) temporary and (b) only 512MB. The only current consumer of // this code is FragCorrectOVL.c, which doesn't run on the whole // human, it runs on ~24 pieces, which cuts this down to < 32MB. len = _lastIIDrequested - _firstIIDrequested + 1; offsets = new ovStoreOfft [len]; if (len != AS_UTL_safeRead(_offtFile, offsets, "AS_OVS_numOverlapsInRange", sizeof(ovStoreOfft), len)) { fprintf(stderr, "AS_OVS_numOverlapsInRange()-- short read on offsets!\n"); exit(1); } for (i=0; i<len; i++) numolap += offsets[i]._numOlaps; delete [] offsets; AS_UTL_fseek(_offtFile, originalposition, SEEK_SET); return(numolap); }
uint32 ovStore::readOverlaps(ovOverlap *overlaps, uint32 maxOverlaps, bool restrictToIID) { int numOvl = 0; assert(_isOutput == FALSE); // If we've finished reading overlaps for the current a_iid, get // another a_iid. If we hit EOF here, we're all done, no more // overlaps. while (_offt._numOlaps == 0) if (0 == AS_UTL_safeRead(_offtFile, &_offt, "ovStore::readOverlaps::offset", sizeof(ovStoreOfft), 1)) return(0); // And if we've exited the range of overlaps requested, return. if (_offt._a_iid > _lastIIDrequested) return(0); // Just a query? Return the number of overlaps we'd want to read if ((overlaps == NULL) || (maxOverlaps == 0)) return(_offt._numOlaps); // Read all the overlaps for this ID. assert(_offt._numOlaps <= maxOverlaps); while (((restrictToIID == true) && (_offt._numOlaps > 0)) || ((restrictToIID == false) && (_offt._numOlaps > 0) && (numOvl < maxOverlaps))) { // Read an overlap. If this fails, open the next partition and read from there. while ((_bof == NULL) || (_bof->readOverlap(overlaps + numOvl) == false)) { char name[FILENAME_MAX]; // We read no overlap, open the next file and try again. delete _bof; _currentFileIndex++; if (_currentFileIndex > _info._highestFileIndex) // No more files, stop trying to load an overlap. break; sprintf(name, "%s/%04d", _storePath, _currentFileIndex); _bof = new ovFile(name, ovFileNormal); } // If the currentFileIndex is invalid, we ran out of overlaps to load. Don't save that // empty overlap to the list. if (_currentFileIndex <= _info._highestFileIndex) { overlaps[numOvl].a_iid = _offt._a_iid; overlaps[numOvl].g = _gkp; if (_evalues) overlaps[numOvl].evalue(_evalues[_offt._overlapID++]); numOvl++; assert(_offt._numOlaps > 0); _offt._numOlaps--; } // If restrictToIID == false, we're loading all overlaps up to the end of the store, or the // request last IID. If to the end of store, we never read a last 'offset' and so a_iid is // still valid (below lastIIDrequested == infinity) but numOlaps is still zero, and the mail // loop terminates. if (restrictToIID == false) { while (_offt._numOlaps == 0) if (0 == AS_UTL_safeRead(_offtFile, &_offt, "ovStore::readOverlap::offset", sizeof(ovStoreOfft), 1)) break; if (_offt._a_iid > _lastIIDrequested) break; } } // while space for more overlaps, load overlaps assert(numOvl <= maxOverlaps); return(numOvl); }
void mergeInfoFiles(char *storePath, uint32 nPieces) { ovStoreInfo infopiece; ovStoreInfo info; info._ovsMagic = ovStoreMagic; info._ovsVersion = ovStoreVersion; info._smallestIID = UINT64_MAX; info._largestIID = 0; info._numOverlapsTotal = 0; info._highestFileIndex = nPieces; info._maxReadLenInBits = AS_MAX_READLEN_BITS; ovStoreOfft offm; offm._a_iid = 0; offm._fileno = 1; offm._offset = 0; offm._numOlaps = 0; // Open the new master index output file char name[FILENAME_MAX]; sprintf(name, "%s/index", storePath); errno = 0; FILE *idx = fopen(name, "w"); if (errno) fprintf(stderr, "ERROR: Failed to open '%s': %s\n", name, strerror(errno)), exit(1); // Special case, we need an empty index for the zeroth fragment. AS_UTL_safeWrite(idx, &offm, "ovStore::mergeInfoFiles::offsetZero", sizeof(ovStoreOfft), 1); // Process each for (uint32 i=1; i<=nPieces; i++) { sprintf(name, "%s/%04d.info", storePath, i); fprintf(stderr, "Processing '%s'\n", name); if (AS_UTL_fileExists(name, FALSE, FALSE) == false) { fprintf(stderr, "ERROR: file '%s' not found.\n", name); exit(1); } { errno = 0; FILE *F = fopen(name, "r"); if (errno) fprintf(stderr, "ERROR: Failed to open '%s': %s\n", name, strerror(errno)), exit(1); AS_UTL_safeRead(F, &infopiece, "ovStore::mergeInfoFiles::infopiece", sizeof(ovStoreInfo), 1); fclose(F); } // Add empty index elements for missing overlaps if (infopiece._numOverlapsTotal == 0) { fprintf(stderr, " No overlaps found.\n"); continue; } assert(infopiece._smallestIID <= infopiece._largestIID); if (info._largestIID + 1 < infopiece._smallestIID) fprintf(stderr, " Adding empty records for fragments "F_U64" to "F_U64"\n", info._largestIID + 1, infopiece._smallestIID - 1); while (info._largestIID + 1 < infopiece._smallestIID) { offm._a_iid = info._largestIID + 1; //offm._fileno = set elsewhere //offm._offset = set elsewhere //offm._numOlaps = 0; AS_UTL_safeWrite(idx, &offm, "ovStore::mergeInfoFiles::offsets", sizeof(ovStoreOfft), 1); info._largestIID++; } // Copy index elements for existing overlaps. While copying, update the supposed position // of any fragments with no overlaps. Without doing this, accessing the store beginning // or ending at such a fragment will fail. { sprintf(name, "%s/%04d.index", storePath, i); errno = 0; FILE *F = fopen(name, "r"); if (errno) fprintf(stderr, "ERROR: Failed to open '%s': %s\n", name, strerror(errno)), exit(1); uint32 recsLen = 0; uint32 recsMax = 1024 * 1024; ovStoreOfft *recs = new ovStoreOfft [recsMax]; recsLen = AS_UTL_safeRead(F, recs, "ovStore::mergeInfoFiles::offsetsLoad", sizeof(ovStoreOfft), recsMax); if (recsLen > 0) { if (info._largestIID + 1 != recs[0]._a_iid) fprintf(stderr, "ERROR: '%s' starts with iid "F_U32", but store only up to "F_U64"\n", name, recs[0]._a_iid, info._largestIID); assert(info._largestIID + 1 == recs[0]._a_iid); } while (recsLen > 0) { offm._fileno = recs[recsLen-1]._fileno; // Update location of missing stuff. offm._offset = recs[recsLen-1]._offset; AS_UTL_safeWrite(idx, recs, "ovStore::mergeInfoFiles::offsetsWrite", sizeof(ovStoreOfft), recsLen); recsLen = AS_UTL_safeRead(F, recs, "ovStore::mergeInfoFiles::offsetsReLoad", sizeof(ovStoreOfft), recsMax); } delete [] recs; fclose(F); } // Update info._smallestIID = MIN(info._smallestIID, infopiece._smallestIID); info._largestIID = MAX(info._largestIID, infopiece._largestIID); info._numOverlapsTotal += infopiece._numOverlapsTotal; fprintf(stderr, " Now finished with fragments "F_U64" to "F_U64" -- "F_U64" overlaps.\n", info._smallestIID, info._largestIID, info._numOverlapsTotal); } fclose(idx); // Dump the new store info file { sprintf(name, "%s/info", storePath); errno = 0; FILE *F = fopen(name, "w"); if (errno) fprintf(stderr, "ERROR: Failed to open '%s': %s\n", name, strerror(errno)), exit(1); AS_UTL_safeWrite(F, &info, "ovStore::mergeInfoFiles::finalInfo", sizeof(ovStoreInfo), 1); fclose(F); } fprintf(stderr, "\n"); fprintf(stderr, "Index finalized for reads "F_U64" to "F_U64" with "F_U64" overlaps.\n", info._smallestIID, info._largestIID, info._numOverlapsTotal); }
void ovStore::ovStore_read(void) { char name[FILENAME_MAX]; sprintf(name, "%s/info", _storePath); errno = 0; FILE *ovsinfo = fopen(name, "r"); if (errno) fprintf(stderr, "ERROR: directory '%s' is not an ovelrapStore; failed to open info file '%s': %s\n", _storePath, name, strerror(errno)), exit(1); AS_UTL_safeRead(ovsinfo, &_info, "ovStore::ovStore::info", sizeof(ovStoreInfo), 1); fclose(ovsinfo); if ((_info._ovsMagic != ovStoreMagic) && (_info._ovsMagic != ovStoreMagicIncomplete)) fprintf(stderr, "ERROR: directory '%s' is not an overlapStore; magic number 0x%016"F_X64P" incorrect.\n", _storePath, _info._ovsMagic), exit(1); if ((_info._ovsMagic != ovStoreMagic) && (_info._ovsMagic != ovStoreMagicIncomplete)) fprintf(stderr, "ERROR: overlapStore '%s' is incomplate; creation crashed?\n", _storePath), exit(1); if (_info._ovsVersion != ovStoreVersion) fprintf(stderr, "ERROR: overlapStore '%s' is version "F_U64"; this code supports only version "F_U64".\n", _storePath, _info._ovsVersion, ovStoreVersion), exit(1); if (_info._maxReadLenInBits != AS_MAX_READLEN_BITS) fprintf(stderr, "ERROR: overlapStore '%s' is for AS_MAX_READLEN_BITS="F_U64"; this code supports only %d bits.\n", _storePath, _info._maxReadLenInBits, AS_MAX_READLEN_BITS), exit(1); // Load stats #if 0 sprintf(name, "%s/statistics", _storePath); errno = 0; FILE *ost = fopen(name, "r"); if (errno) fprintf(stderr, "failed to open the stats file '%s': %s\n", name, strerror(errno)), exit(1); AS_UTL_safeRead(ost, &_stats, "ovStore::ovStore::stats", sizeof(OverlapStoreStats), 1); fclose(ost); #endif // Open the index sprintf(name, "%s/index", _storePath); errno = 0; _offtFile = fopen(name, "r"); if (errno) fprintf(stderr, "ERROR: failed to open offset file '%s': %s\n", name, strerror(errno)), exit(1); // Open erates sprintf(name, "%s/evalues", _storePath); if (AS_UTL_fileExists(name)) { _evaluesMap = new memoryMappedFile(name, memoryMappedFile_readOnly); _evalues = (uint16 *)_evaluesMap->get(0); } //_offtMMap = new memoryMappedFile(name, memoryMappedFile_readOnly); //_offts = (ovStoreOfft *)_offtMMap->get(0); //_offtLength = _offtMap->length() / sizeof(ovStoreOfft); }
bool testIndex(char *ovlName, bool doFixes) { char name[FILENAME_MAX]; FILE *I = NULL; FILE *F = NULL; sprintf(name, "%s/index", ovlName); errno = 0; I = fopen(name, "r"); if (errno) fprintf(stderr, "ERROR: Failed to open '%s' for reading: %s\n", name, strerror(errno)), exit(1); //fprintf(stderr, "TESTING '%s'\n", name); if (doFixes) { sprintf(name, "%s/index.fixed", ovlName); errno = 0; F = fopen(name, "w"); if (errno) fprintf(stderr, "ERROR: Failed to open '%s' for writing: %s\n", name, strerror(errno)), exit(1); //fprintf(stderr, "WITH FIXES TO '%s'\n", name); } ovStoreOfft O; uint32 curIID = 0; uint32 minIID = UINT32_MAX; uint32 maxIID = 0; uint32 nErrs = 0; while (1 == AS_UTL_safeRead(I, &O, "offset", sizeof(ovStoreOfft), 1)) { bool maxIncreases = (maxIID < O._a_iid); bool errorDecreased = ((O._a_iid < curIID)); bool errorGap = ((O._a_iid > 0) && (curIID + 1 != O._a_iid)); if (O._a_iid < minIID) minIID = O._a_iid; if (maxIncreases) maxIID = O._a_iid; if (errorDecreased) fprintf(stderr, "ERROR: index decreased from "F_U32" to "F_U32"\n", curIID, O._a_iid), nErrs++; else if (errorGap) fprintf(stderr, "ERROR: gap between "F_U32" and "F_U32"\n", curIID, O._a_iid), nErrs++; if ((maxIncreases == true) && (errorGap == false)) { if (doFixes) AS_UTL_safeWrite(F, &O, "offset", sizeof(ovStoreOfft), 1); } else if (O._numOlaps > 0) { fprintf(stderr, "ERROR: lost overlaps a_iid "F_U32" fileno "F_U32" offset "F_U32" numOlaps "F_U32"\n", O._a_iid, O._fileno, O._offset, O._numOlaps); } curIID = O._a_iid; } fclose(I); if (F) fclose(F); return(nErrs == 0); }
bool BestOverlapGraph::load(const char *prefix, double AS_UTG_ERROR_RATE, double AS_UTG_ERROR_LIMIT) { char name[FILENAME_MAX]; sprintf(name, "%s.bog", prefix); errno = 0; FILE *file = fopen(name, "r"); if (errno) return(false); assert(_best5 != NULL); assert(_best3 != NULL); assert(_bestC != NULL); uint64 magicNumber; uint64 versionNumber; AS_UTL_safeRead(file, &magicNumber, "magicnumber", sizeof(uint64), 1); AS_UTL_safeRead(file, &versionNumber, "versionnumber", sizeof(uint64), 1); if (magicNumber != ogMagicNumber) { fprintf(logFile, "BestOverlapGraph()-- File '%s' is not a best overlap graph; cannot load graph.\n", name); fclose(file); return(false); } if (versionNumber != ogVersionNumber) { fprintf(logFile, "BestOverlapGraph()-- File '%s' is version "F_U64", I can only read version "F_U64"; cannot load graph.\n", name, versionNumber, ogVersionNumber); fclose(file); return(false); } fprintf(logFile, "BestOverlapGraph()-- Loading overlap graph from '%s'.\n", name); double eRate = 0.0; double eLimit = 0.0; AS_UTL_safeRead(file, &eRate, "errorRate", sizeof(double), 1); AS_UTL_safeRead(file, &eLimit, "errorLimit", sizeof(double), 1); if (eRate != AS_UTG_ERROR_RATE) fprintf(logFile, "BestOverlapGraph()-- Saved graph in '%s' has error rate %f, this run is expecting error rate %f; cannot load graph.\n", name, eRate, AS_UTG_ERROR_RATE); if (eLimit != AS_UTG_ERROR_LIMIT) fprintf(logFile, "BestOverlapGraph()-- Saved graph in '%s' has error limit %f, this run is expecting error limit %f; cannot load graph.\n", name, eLimit, AS_UTG_ERROR_LIMIT); if ((eRate != AS_UTG_ERROR_RATE) || (eLimit != AS_UTG_ERROR_LIMIT)) { fclose(file); return(false); } AS_UTL_safeRead(file, _best5, "best overlaps", sizeof(BestEdgeOverlap), FI->numFragments() + 1); AS_UTL_safeRead(file, _best3, "best overlaps", sizeof(BestEdgeOverlap), FI->numFragments() + 1); AS_UTL_safeRead(file, _bestC, "best contains", sizeof(BestContainment), FI->numFragments() + 1); for (uint32 i=0; i<FI->numFragments() + 1; i++) { if (_bestC[i].olapsLen > 0) { _bestC[i].olaps = new uint32 [_bestC[i].olapsLen]; AS_UTL_safeRead(file, _bestC[i].olaps, "best contains olaps", sizeof(uint32), _bestC[i].olapsLen); } else { assert(_bestC[i].olaps == NULL); } } fclose(file); return(true); }
// The N valid modes for a 'new gkpStore' call: // // 1) Add new reads/libraries, modify old ones. gkStore(path, true, true) // 2) No addition, but can modify old ones. gkStore(path, true) // 3) No addition, no modification. gkStore(path); // gkStore::gkStore(char const *path, gkStore_mode mode, uint32 partID) { char name[FILENAME_MAX]; memset(_storePath, 0, sizeof(char) * FILENAME_MAX); memset(_storeName, 0, sizeof(char) * FILENAME_MAX); strcpy(_storePath, path); strcpy(_storeName, path); // Broken. sprintf(name, "%s/info", _storePath); // If the info file exists, load it. if (AS_UTL_fileExists(name, false, false) == true) { errno = 0; FILE *I = fopen(name, "r"); AS_UTL_safeRead(I, &_info, "gkStore::_info", sizeof(gkStoreInfo), 1); fclose(I); } // Check sizes are correct. uint32 failed = 0; if (_info.gkLibrarySize != sizeof(gkLibrary)) failed += fprintf(stderr, "ERROR: gkLibrary size in store = %u, differs from executable = %u\n", _info.gkLibrarySize, sizeof(gkLibrary)); if (_info.gkReadSize != sizeof(gkRead)) failed += fprintf(stderr, "ERROR: gkRead size in store = %u, differs from executable = %u\n", _info.gkReadSize, sizeof(gkRead)); if (_info.gkMaxLibrariesBits != AS_MAX_LIBRARIES_BITS) failed += fprintf(stderr, "ERROR: AS_MAX_LIBRARIES_BITS in store = %u, differs from executable = %u\n", _info.gkMaxLibrariesBits, AS_MAX_LIBRARIES_BITS); if (_info.gkLibraryNameSize != LIBRARY_NAME_SIZE) failed += fprintf(stderr, "ERROR: LIBRARY_NAME_SIZE in store = %u, differs from executable = %u\n", _info.gkLibraryNameSize, LIBRARY_NAME_SIZE); if (_info.gkMaxReadBits != AS_MAX_READS_BITS) failed += fprintf(stderr, "ERROR: AS_MAX_READS_BITS in store = %u, differs from executable = %u\n", _info.gkMaxReadBits, AS_MAX_READS_BITS); if (_info.gkMaxReadLenBits != AS_MAX_READLEN_BITS) failed += fprintf(stderr, "ERROR: AS_MAX_READLEN_BITS in store = %u, differs from executable = %u\n", _info.gkMaxReadLenBits, AS_MAX_READLEN_BITS); if (failed) fprintf(stderr, "ERROR:\nERROR: Can't open store '%s': parameters in src/AS_global.H are incompatible with the store.\n", _storePath), exit(1); assert(_info.gkLibrarySize == sizeof(gkLibrary)); assert(_info.gkReadSize == sizeof(gkRead)); assert(_info.gkMaxLibrariesBits == AS_MAX_LIBRARIES_BITS); assert(_info.gkLibraryNameSize == LIBRARY_NAME_SIZE); assert(_info.gkMaxReadBits == AS_MAX_READS_BITS); assert(_info.gkMaxReadLenBits == AS_MAX_READLEN_BITS); // Clear ourself, to make valgrind happier. _librariesMMap = NULL; _librariesAlloc = 0; _libraries = NULL; _readsMMap = NULL; _readsAlloc = 0; _reads = NULL; _blobsMMap = NULL; _blobs = NULL; _blobsFile = NULL; _mode = mode; _numberOfPartitions = 0; _partitionID = 0; _readIDtoPartitionIdx = NULL; _readIDtoPartitionID = NULL; _readsPerPartition = NULL; //_readsInThisPartition = NULL; // // READ ONLY // if ((mode == gkStore_readOnly) && (partID == UINT32_MAX)) { //fprintf(stderr, "gkStore()-- opening '%s' for read-only access.\n", _storePath); if (AS_UTL_fileExists(_storePath, true, false) == false) { fprintf(stderr, "gkStore()-- failed to open '%s' for read-only access: store doesn't exist.\n", _storePath); exit(1); } sprintf(name, "%s/libraries", _storePath); _librariesMMap = new memoryMappedFile (name, memoryMappedFile_readOnly); _libraries = (gkLibrary *)_librariesMMap->get(0); sprintf(name, "%s/reads", _storePath); _readsMMap = new memoryMappedFile (name, memoryMappedFile_readOnly); _reads = (gkRead *)_readsMMap->get(0); sprintf(name, "%s/blobs", _storePath); _blobsMMap = new memoryMappedFile (name, memoryMappedFile_readOnly); _blobs = (void *)_blobsMMap->get(0); } // // MODIFY, NO APPEND (also for building a partitioned store) // else if ((mode == gkStore_modify) && (partID == UINT32_MAX)) { //fprintf(stderr, "gkStore()-- opening '%s' for read-write access.\n", _storePath); if (AS_UTL_fileExists(_storePath, true, false) == false) { fprintf(stderr, "gkStore()-- failed to open '%s' for read-write access: store doesn't exist.\n", _storePath); exit(1); } sprintf(name, "%s/libraries", _storePath); _librariesMMap = new memoryMappedFile (name, memoryMappedFile_readWrite); _libraries = (gkLibrary *)_librariesMMap->get(0); sprintf(name, "%s/reads", _storePath); _readsMMap = new memoryMappedFile (name, memoryMappedFile_readWrite); _reads = (gkRead *)_readsMMap->get(0); sprintf(name, "%s/blobs", _storePath); _blobsMMap = new memoryMappedFile (name, memoryMappedFile_readWrite); _blobs = (void *)_blobsMMap->get(0); } // // MODIFY, APPEND, open mmap'd files, but copy them entirely to local memory // else if ((mode == gkStore_extend) && (partID == UINT32_MAX)) { //fprintf(stderr, "gkStore()-- opening '%s' for read-write and append access.\n", _storePath); if (AS_UTL_fileExists(_storePath, true, true) == false) AS_UTL_mkdir(_storePath); _librariesAlloc = MAX(64, 2 * _info.numLibraries); _libraries = new gkLibrary [_librariesAlloc]; sprintf(name, "%s/libraries", _storePath); if (AS_UTL_fileExists(name, false, false) == true) { _librariesMMap = new memoryMappedFile (name, memoryMappedFile_readOnly); memcpy(_libraries, _librariesMMap->get(0), sizeof(gkLibrary) * (_info.numLibraries + 1)); delete _librariesMMap; _librariesMMap = NULL;; } _readsAlloc = MAX(128, 2 * _info.numReads); _reads = new gkRead [_readsAlloc]; sprintf(name, "%s/reads", _storePath); if (AS_UTL_fileExists(name, false, false) == true) { _readsMMap = new memoryMappedFile (name, memoryMappedFile_readOnly); memcpy(_reads, _readsMMap->get(0), sizeof(gkRead) * (_info.numReads + 1)); delete _readsMMap; _readsMMap = NULL; } sprintf(name, "%s/blobs", _storePath); _blobsMMap = NULL; _blobs = NULL; errno = 0; _blobsFile = fopen(name, "a+"); if (errno) fprintf(stderr, "gkStore()-- Failed to open blobs file '%s' for appending: %s\n", name, strerror(errno)), exit(1); } // // PARTITIONED, no modifications, no appends // // BIG QUESTION: do we want to partition the read metadata too, or is it small enough // to load in every job? For now, we load all the metadata. else if ((mode == gkStore_readOnly) && (partID != UINT32_MAX)) { //fprintf(stderr, "gkStore()-- opening '%s' partition '%u' for read-only access.\n", _storePath, partID); // For partitioned reads, we need to have a uint32 map of readID to partitionReadID so we can // lookup the metadata in the partitoned _reads data. This is 4 bytes per read, compared to 24 // bytes for the full meta data. Assuming 100x of 3kb read coverage on human, that's 100 // million reads, so 0.400 GB vs 2.4 GB. sprintf(name, "%s/partitions/map", _storePath); errno = 0; FILE *F = fopen(name, "r"); if (errno) fprintf(stderr, "gkStore::gkStore()-- failed to open '%s' for reading: %s\n", name, strerror(errno)), exit(1); AS_UTL_safeRead(F, &_numberOfPartitions, "gkStore::_numberOfPartitions", sizeof(uint32), 1); _partitionID = partID; _readsPerPartition = new uint32 [_numberOfPartitions + 1]; // No zeroth element in any of these _readIDtoPartitionID = new uint32 [gkStore_getNumReads() + 1]; _readIDtoPartitionIdx = new uint32 [gkStore_getNumReads() + 1]; AS_UTL_safeRead(F, _readsPerPartition, "gkStore::_readsPerPartition", sizeof(uint32), _numberOfPartitions + 1); AS_UTL_safeRead(F, _readIDtoPartitionID, "gkStore::_readIDtoPartitionID", sizeof(uint32), gkStore_getNumReads() + 1); AS_UTL_safeRead(F, _readIDtoPartitionIdx, "gkStore::_readIDtoPartitionIdx", sizeof(uint32), gkStore_getNumReads() + 1); fclose(F); sprintf(name, "%s/libraries", _storePath); _librariesMMap = new memoryMappedFile (name, memoryMappedFile_readOnly); _libraries = (gkLibrary *)_librariesMMap->get(0); //fprintf(stderr, " -- openend '%s' at "F_X64"\n", name, _libraries); sprintf(name, "%s/partitions/reads.%04"F_U32P"", _storePath, partID); _readsMMap = new memoryMappedFile (name, memoryMappedFile_readOnly); _reads = (gkRead *)_readsMMap->get(0); //fprintf(stderr, " -- openend '%s' at "F_X64"\n", name, _reads); sprintf(name, "%s/partitions/blobs.%04"F_U32P"", _storePath, partID); _blobsMMap = new memoryMappedFile (name, memoryMappedFile_readOnly); _blobs = (void *)_blobsMMap->get(0); //fprintf(stderr, " -- openend '%s' at "F_X64"\n", name, _blobs); } // Info only, no access to reads or libraries. else if (mode == gkStore_infoOnly) { //fprintf(stderr, "gkStore()-- opening '%s' for info-only access.\n", _storePath); } else { fprintf(stderr, "gkStore::gkStore()-- invalid mode '%s' with partition ID %u.\n", toString(mode), partID); assert(0); } }