void MateLocation::dumpHappiness(const char *prefix, const char *name) { char dirname[FILENAME_MAX] = {0}; char outname[FILENAME_MAX] = {0}; sprintf(dirname, "%s.%03u.%s.mateHappiness", prefix, logFileOrder, name); sprintf(outname, "%s.%03u.%s.mateHappiness/utg%09u.mateHappiness", prefix, logFileOrder, name, _tig->id()); if (AS_UTL_fileExists(dirname, TRUE, TRUE) == 0) AS_UTL_mkdir(dirname); FILE *F = fopen(outname, "w"); for (int32 i=0; i<_tigLen; i++) fprintf(F, "%u\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\n", i, good[i], badFwd[i], badRev[i], badExternalFwd[i], badExternalRev[i], badCompressed[i], badStretched[i], badNormal[i], badAnti[i], badOuttie[i]); fclose(F); }
void store_scaffold_walk_statistics(ScaffoldWalkStatisticsT *s) { char filename[200]; FILE *output; AS_UTL_mkdir("stat"); sprintf(filename,"stats/gapsInScaffold.%d.stat",s->scaffoldID); output = fopen(filename,"w"); if( output == NULL ) { fprintf(stderr,"=== ERROR : Could not open gap statistics file for writing in scaffold %d\n",s->scaffoldID); return; } else CopyToFileVA_GapStatisticsT(s->GapStats,output); fclose(output); sprintf(filename,"stats/scaffold.%d.stat",s->scaffoldID); output = fopen(filename,"w"); if( output == NULL ) { fprintf(stderr,"=== ERROR : Could not open scaffold statistics file for writing in scaffold %d\n",s->scaffoldID); return; } else { fprintf(output,"%d %d",s->scaffoldID,s->insertedChunks); } fclose(output); return; }
void ovStore::ovStore_write(void) { AS_UTL_mkdir(_storePath); char name[FILENAME_MAX]; sprintf(name, "%s/info", _storePath); // If the ovs file exists, AND has a valid magic number, then the store is complete and we should // abort before the valid store is destroyed. if (AS_UTL_fileExists(name, false, false)) { errno = 0; FILE *ovsinfo = fopen(name, "r"); if (errno) { fprintf(stderr, "ERROR: failed to read store metadata from '%s': %s\n", name, strerror(errno)); exit(1); } AS_UTL_safeRead(ovsinfo, &_info, "ovStore::ovStore::testinfo", sizeof(ovStoreInfo), 1); fclose(ovsinfo); if (_info._ovsMagic == ovStoreMagic) fprintf(stderr, "ERROR: overlapStore '%s' is a valid overlap store, will not overwrite.\n", _storePath), exit(1); } // Create a new incomplete info file. errno = 0; FILE *ovsinfo = fopen(name, "w"); if (errno) fprintf(stderr, "failed to create overlap store '%s': %s\n", _storePath, strerror(errno)), exit(1); AS_UTL_safeWrite(ovsinfo, &_info, "ovStore::ovStore::saveinfo", sizeof(ovStoreInfo), 1); fclose(ovsinfo); sprintf(name, "%s/index", _storePath); errno = 0; _offtFile = fopen(name, "w"); if (errno) fprintf(stderr, "AS_OVS_createOverlapStore()-- failed to open offset file '%s': %s\n", name, strerror(errno)), exit(1); _overlapsThisFile = 0; _currentFileIndex = 0; _bof = NULL; }
int main(int argc, char **argv) { int32 minEvalue = 0; int32 maxEvalue = 0; int32 step = 1; char D[FILENAME_MAX]; char O[FILENAME_MAX]; if (argc == 2) { minEvalue = atoi(argv[1]); maxEvalue = minEvalue; } else if (argc == 3) { minEvalue = atoi(argv[1]); maxEvalue = atoi(argv[2]); } else if (argc == 4) { minEvalue = atoi(argv[1]); maxEvalue = atoi(argv[2]); step = atoi(argv[3]); } else { fprintf(stderr, "usage: %s minEvalue [maxEvalue [step]]\n", argv[0]); fprintf(stderr, " computes overlapper probabilities for minEvalue <= eValue <= maxEvalue'\n"); fprintf(stderr, " eValue 100 == 0.01 fraction error == 1%% error\n"); exit(1); } fprintf(stderr, "Computing Edit_Match_Limit data for reads of length %ubp (bits = %u).\n", AS_MAX_READLEN, AS_MAX_READLEN_BITS); sprintf(D, "prefixEditDistance-matchLimitData-BITS=%01d", AS_MAX_READLEN_BITS); AS_UTL_mkdir(D); #pragma omp parallel for schedule(dynamic, 1) for (int32 evalue=maxEvalue; evalue>=minEvalue; evalue -= step) { char N[FILENAME_MAX]; // Local to this thread! double erate = evalue / 10000.0; int32 start = 1; int32 MAX_ERRORS = (1 + (int) (erate * AS_MAX_READLEN)); int32 ERRORS_FOR_FREE = 1; int32 *starts = new int32 [MAX_ERRORS + 1]; memset(starts, 0, sizeof(int32) * (MAX_ERRORS + 1)); sprintf(N, "%s/prefixEditDistance-matchLimit-%04d.bin", D, evalue); if (AS_UTL_fileExists(N)) { fprintf(stderr, "eValue %04d -- eRate %6.4f -- %7.4f%% error -- %8d values -- thread %2d - LOAD\n", evalue, erate, erate * 100.0, MAX_ERRORS, omp_get_thread_num()); errno = 0; FILE *F = fopen(N, "r"); if (errno) fprintf(stderr, "Failed to open '%s' for reading: %s\n", N, strerror(errno)), exit(1); int32 me = 0; double er = 0.0; fread(&me, sizeof(int32), 1, F); fread(&er, sizeof(double), 1, F); fread( starts, sizeof(int32), MAX_ERRORS, F); assert(me == MAX_ERRORS); assert(er == erate); fclose(F); } else { fprintf(stderr, "eValue %04d -- eRate %6.4f -- %7.4f%% error -- %8d values -- thread %2d - COMPUTE\n", evalue, erate, erate * 100.0, MAX_ERRORS, omp_get_thread_num()); for (int32 e=ERRORS_FOR_FREE + 1; e<MAX_ERRORS; e++) { start = Binomial_Bound(e - ERRORS_FOR_FREE, erate, start); starts[e] = start - 1; } } { sprintf(O, "%s/prefixEditDistance-matchLimit-%04d.bin", D, evalue); errno = 0; FILE *F = fopen(O, "w"); if (errno) fprintf(stderr, "Failed to open '%s' for writing: %s\n", N, strerror(errno)), exit(1); fwrite(&MAX_ERRORS, sizeof(int32), 1, F); fwrite(&erate, sizeof(double), 1, F); fwrite( starts, sizeof(int32), MAX_ERRORS, F); fclose(F); } { sprintf(O, "%s/prefixEditDistance-matchLimit-%04d.dat", D, evalue); errno = 0; FILE *F = fopen(O, "w"); if (errno) fprintf(stderr, "Failed to open '%s' for writing: %s\n", N, strerror(errno)), exit(1); fprintf(F, "#length limit slope0toX slopeXtoMAX for erate=%0.4f MAX_ERRORS=%d\n", erate, MAX_ERRORS); for (uint32 mm=MAX_ERRORS-1, ii=1; ii<MAX_ERRORS; ii++) fprintf(F, "%-8d %8d %11.6f %11.6f\n", ii, starts[ii], (double)(starts[ii] - starts[1]) / (ii - 1 + 1), (double)(starts[mm] - starts[ii]) / (mm - ii + 1)); fclose(F); } { sprintf(O, "%s/prefixEditDistance-matchLimit-%04d.C", D, evalue); errno = 0; FILE *F = fopen(O, "w"); if (errno) fprintf(stderr, "Failed to open '%s' for writing: %s\n", N, strerror(errno)), exit(1); fprintf(F, "//\n"); fprintf(F, "// Automagically generated. Do not edit.\n"); fprintf(F, "//\n"); fprintf(F, "\n"); fprintf(F, "#include \"gkStore.H\"\n"); fprintf(F, "\n"); fprintf(F, "#if (AS_MAX_READLEN_BITS == %d)\n", AS_MAX_READLEN_BITS); fprintf(F, "\n"); fprintf(F, "extern\n"); fprintf(F, "const\n"); fprintf(F, "int32\n"); fprintf(F, "Edit_Match_Limit_%04d[%d] = {\n", evalue, MAX_ERRORS + 1); uint32 i=0; while (i < MAX_ERRORS) { uint32 j=0; fprintf(F, " "); while ((j < 16) && (i < MAX_ERRORS)) { if (i < MAX_ERRORS-1) fprintf(F, "0x%08x,", starts[i]); else fprintf(F, "0x%08x", starts[i]); i++; j++; } fprintf(F, "\n"); } fprintf(F, "};\n"); fprintf(F, "\n"); fprintf(F, "#endif\n"); fclose(F); } } }
/* the below function outputs a number of celagram files describing properties of the gaps */ void output_combined_celagram_files(WalkStatisticsT* ws) { int i; FILE *statNumberOfNegativHops = NULL; FILE *statNumberOfSmallHops = NULL; FILE *statNumberOfBigHops = NULL; FILE *statNumberOfHops = NULL; FILE *statGapLength = NULL; FILE *statGapEstimate = NULL; FILE *statTooShort = NULL; FILE *statTooLong = NULL; char statFileName[256]; // make sure that the stat directory exists AS_UTL_mkdir("stats"); // now we open the celagram files sprintf(statFileName,"stats/number.negativ.hops.cgm"); statNumberOfNegativHops = file_open(statFileName,"w"); assert(NULL != statNumberOfNegativHops); fprintf(statNumberOfNegativHops,"Number of hops in negativ gaps\n"); sprintf(statFileName,"stats/number.small.hops.cgm"); statNumberOfSmallHops = file_open(statFileName,"w"); assert(NULL != statNumberOfSmallHops); fprintf(statNumberOfSmallHops,"Number of hops in small gaps\n"); sprintf(statFileName,"stats/number.big.hops.cgm"); statNumberOfBigHops = file_open(statFileName,"w"); assert(NULL != statNumberOfBigHops); fprintf(statNumberOfBigHops,"Number of hops in big gaps\n"); sprintf(statFileName,"stats/number.hops.cgm"); statNumberOfHops = file_open(statFileName,"w"); assert(NULL != statNumberOfHops); fprintf(statNumberOfHops,"Number of hops in all gaps\n"); sprintf(statFileName,"stats/number.gap.length.cgm"); statGapLength = file_open(statFileName,"w"); assert(NULL != statGapLength); fprintf(statGapLength,"Gap lengths in all walked gaps\n"); sprintf(statFileName,"stats/number.gap.estimate.cgm"); statGapEstimate = file_open(statFileName,"w"); assert(NULL != statGapEstimate); fprintf(statGapEstimate,"Gap estimates in all unwalked gaps\n"); sprintf(statFileName,"stats/number.too.short.misses.cgm"); statTooShort = file_open(statFileName,"w"); assert(NULL != statTooShort); fprintf(statTooShort,"Misses of walks were we could walk too short\n"); sprintf(statFileName,"stats/number.too.long.misses.cgm"); statTooLong = file_open(statFileName,"w"); assert(NULL != statTooLong); fprintf(statTooLong,"Misses of walks were we could walk too long\n"); for(i=0; i< GetNumScaffoldWalkStatisticsTs(ws->ScaffoldStats); i++) { ScaffoldWalkStatisticsT *scaffStatp = GetScaffoldWalkStatisticsT(ws->ScaffoldStats,i); if( scaffStatp->scaffoldID != NO_SCAFFOLD) { int j; for(j=0; j<GetNumGapStatisticsTs(scaffStatp->GapStats); j++) { GapStatisticsT *gapStatp = GetGapStatisticsT(scaffStatp->GapStats,j); // now we write the values in the celagram files // first depending on the size if( gapStatp->flags.bits.walked == TRUE ) { if( gapStatp->gapLength.mean < 0.0 ) { fprintf(statNumberOfNegativHops,"%d ",gapStatp->walkedChunks); } else if( gapStatp->gapLength.mean < SWITCH_THRESHOLD ) { fprintf(statNumberOfSmallHops,"%d ",gapStatp->walkedChunks); } else { fprintf(statNumberOfBigHops,"%d ",gapStatp->walkedChunks); } // then general info fprintf(statNumberOfHops,"%d ",gapStatp->walkedChunks); fprintf(statGapLength,"%d ",(int) gapStatp->gapLength.mean); } else // we did not walk the guy { fprintf(statGapEstimate,"%d ",(int) gapStatp->gapEstimate.mean); if( gapStatp->flags.bits.walkedTooShort ) fprintf(statTooShort,"%d ",(int) gapStatp->bestTooShort); if( gapStatp->flags.bits.walkedTooLong ) fprintf(statTooLong,"%d ",(int) gapStatp->bestTooLong); } } } } // close all files fclose(statNumberOfNegativHops); fclose(statNumberOfSmallHops); fclose(statNumberOfBigHops); fclose(statNumberOfHops); fclose(statGapLength); fclose(statGapEstimate); fclose(statTooLong); fclose(statTooShort); }
void gkStore::gkStore_buildPartitions(uint32 *partitionMap) { char name[FILENAME_MAX]; // Store cannot be partitioned already, and it must be readOnly (for safety) as we don't need to // be changing any of the normal store data. assert(_numberOfPartitions == 0); assert(_mode == gkStore_readOnly); // Figure out what the last partition is uint32 maxPartition = 0; uint32 unPartitioned = 0; assert(partitionMap[0] == UINT32_MAX); for (uint32 fi=1; fi<=gkStore_getNumReads(); fi++) { if (partitionMap[fi] == UINT32_MAX) unPartitioned++; else if (maxPartition < partitionMap[fi]) maxPartition = partitionMap[fi]; } fprintf(stderr, "Found "F_U32" unpartitioned reads and maximum partition of "F_U32"\n", unPartitioned, maxPartition); // Create the partitions by opening N copies of the data stores, // and writing data to each. FILE **blobfiles = new FILE * [maxPartition + 1]; uint64 *blobfileslen = new uint64 [maxPartition + 1]; // Offset, in bytes, into the blobs file FILE **readfiles = new FILE * [maxPartition + 1]; uint32 *readfileslen = new uint32 [maxPartition + 1]; // aka _readsPerPartition uint32 *readIDmap = new uint32 [gkStore_getNumReads() + 1]; // aka _readIDtoPartitionIdx // Be nice and put all the partitions in a subdirectory. sprintf(name,"%s/partitions", _storePath); if (AS_UTL_fileExists(name, true, true) == false) AS_UTL_mkdir(name); // Open all the output files -- fail early if we can't open that many files. blobfiles[0] = NULL; blobfileslen[0] = UINT64_MAX; readfiles[0] = NULL; readfileslen[0] = UINT32_MAX; for (uint32 i=1; i<=maxPartition; i++) { sprintf(name,"%s/partitions/blobs.%04d", _storePath, i); errno = 0; blobfiles[i] = fopen(name, "w"); blobfileslen[i] = 0; if (errno) fprintf(stderr, "gkStore::gkStore_buildPartitions()-- ERROR: failed to open partition %u file '%s' for write: %s\n", i, name, strerror(errno)), exit(1); sprintf(name,"%s/partitions/reads.%04d", _storePath, i); errno = 0; readfiles[i] = fopen(name, "w"); readfileslen[i] = 0; if (errno) fprintf(stderr, "gkStore::gkStore_buildPartitions()-- ERROR: failed to open partition %u file '%s' for write: %s\n", i, name, strerror(errno)), exit(1); } // Open the output partition map file -- we might as well fail early if we can't make it also. sprintf(name,"%s/partitions/map", _storePath); errno = 0; FILE *rIDmF = fopen(name, "w"); if (errno) fprintf(stderr, "gkStore::gkStore_buildPartitions()-- ERROR: failed to open partition map file '%s': %s\n", name, strerror(errno)), exit(1); // Copy the blob from the master file to the partitioned file, update pointers. readIDmap[0] = UINT32_MAX; // There isn't a zeroth read, make it bogus. for (uint32 fi=1; fi<=gkStore_getNumReads(); fi++) { uint32 pi = partitionMap[fi]; assert(pi != 0); // No zeroth partition, right? if (pi == UINT32_MAX) // Deleted reads are not assigned a partition; skip them continue; // Make a copy of the read, then modify it for the partition, then write it to the partition. // Without the copy, we'd need to update the master record too. gkRead partRead = _reads[fi]; //*gkStore_getRead(fi); partRead.gkRead_copyDataToPartition(_blobs, blobfiles, blobfileslen, pi); #if 1 fprintf(stderr, "read "F_U32"="F_U32" len "F_U32" -- blob master "F_U64" -- to part "F_U32" new read id "F_U32" blob "F_U64"/"F_U64" -- at readIdx "F_U32"\n", fi, _reads[fi].gkRead_readID(), _reads[fi].gkRead_sequenceLength(), _reads[fi]._mPtr, pi, partRead.gkRead_readID(), partRead._pID, partRead._mPtr, readfileslen[pi]); #endif AS_UTL_safeWrite(readfiles[pi], &partRead, "gkStore::gkStore_buildPartitions::read", sizeof(gkRead), 1); readIDmap[fi] = readfileslen[pi]++; } // There isn't a zeroth read. AS_UTL_safeWrite(rIDmF, &maxPartition, "gkStore::gkStore_buildPartitions::maxPartition", sizeof(uint32), 1); AS_UTL_safeWrite(rIDmF, readfileslen, "gkStore::gkStore_buildPartitions::readfileslen", sizeof(uint32), maxPartition + 1); AS_UTL_safeWrite(rIDmF, partitionMap, "gkStore::gkStore_buildPartitions::partitionMap", sizeof(uint32), gkStore_getNumReads() + 1); AS_UTL_safeWrite(rIDmF, readIDmap, "gkStore::gkStore_buildPartitions::readIDmap", sizeof(uint32), gkStore_getNumReads() + 1); // cleanup -- close all the files, delete storage fclose(rIDmF); for (uint32 i=1; i<=maxPartition; i++) { fprintf(stderr, "partition "F_U32" has "F_U32" reads\n", i, readfileslen[i]); errno = 0; fclose(blobfiles[i]); fclose(readfiles[i]); if (errno) fprintf(stderr, " warning: %s\n", strerror(errno)); } delete [] readIDmap; delete [] readfileslen; delete [] readfiles; delete [] blobfileslen; delete [] blobfiles; }
// The N valid modes for a 'new gkpStore' call: // // 1) Add new reads/libraries, modify old ones. gkStore(path, true, true) // 2) No addition, but can modify old ones. gkStore(path, true) // 3) No addition, no modification. gkStore(path); // gkStore::gkStore(char const *path, gkStore_mode mode, uint32 partID) { char name[FILENAME_MAX]; memset(_storePath, 0, sizeof(char) * FILENAME_MAX); memset(_storeName, 0, sizeof(char) * FILENAME_MAX); strcpy(_storePath, path); strcpy(_storeName, path); // Broken. sprintf(name, "%s/info", _storePath); // If the info file exists, load it. if (AS_UTL_fileExists(name, false, false) == true) { errno = 0; FILE *I = fopen(name, "r"); AS_UTL_safeRead(I, &_info, "gkStore::_info", sizeof(gkStoreInfo), 1); fclose(I); } // Check sizes are correct. uint32 failed = 0; if (_info.gkLibrarySize != sizeof(gkLibrary)) failed += fprintf(stderr, "ERROR: gkLibrary size in store = %u, differs from executable = %u\n", _info.gkLibrarySize, sizeof(gkLibrary)); if (_info.gkReadSize != sizeof(gkRead)) failed += fprintf(stderr, "ERROR: gkRead size in store = %u, differs from executable = %u\n", _info.gkReadSize, sizeof(gkRead)); if (_info.gkMaxLibrariesBits != AS_MAX_LIBRARIES_BITS) failed += fprintf(stderr, "ERROR: AS_MAX_LIBRARIES_BITS in store = %u, differs from executable = %u\n", _info.gkMaxLibrariesBits, AS_MAX_LIBRARIES_BITS); if (_info.gkLibraryNameSize != LIBRARY_NAME_SIZE) failed += fprintf(stderr, "ERROR: LIBRARY_NAME_SIZE in store = %u, differs from executable = %u\n", _info.gkLibraryNameSize, LIBRARY_NAME_SIZE); if (_info.gkMaxReadBits != AS_MAX_READS_BITS) failed += fprintf(stderr, "ERROR: AS_MAX_READS_BITS in store = %u, differs from executable = %u\n", _info.gkMaxReadBits, AS_MAX_READS_BITS); if (_info.gkMaxReadLenBits != AS_MAX_READLEN_BITS) failed += fprintf(stderr, "ERROR: AS_MAX_READLEN_BITS in store = %u, differs from executable = %u\n", _info.gkMaxReadLenBits, AS_MAX_READLEN_BITS); if (failed) fprintf(stderr, "ERROR:\nERROR: Can't open store '%s': parameters in src/AS_global.H are incompatible with the store.\n", _storePath), exit(1); assert(_info.gkLibrarySize == sizeof(gkLibrary)); assert(_info.gkReadSize == sizeof(gkRead)); assert(_info.gkMaxLibrariesBits == AS_MAX_LIBRARIES_BITS); assert(_info.gkLibraryNameSize == LIBRARY_NAME_SIZE); assert(_info.gkMaxReadBits == AS_MAX_READS_BITS); assert(_info.gkMaxReadLenBits == AS_MAX_READLEN_BITS); // Clear ourself, to make valgrind happier. _librariesMMap = NULL; _librariesAlloc = 0; _libraries = NULL; _readsMMap = NULL; _readsAlloc = 0; _reads = NULL; _blobsMMap = NULL; _blobs = NULL; _blobsFile = NULL; _mode = mode; _numberOfPartitions = 0; _partitionID = 0; _readIDtoPartitionIdx = NULL; _readIDtoPartitionID = NULL; _readsPerPartition = NULL; //_readsInThisPartition = NULL; // // READ ONLY // if ((mode == gkStore_readOnly) && (partID == UINT32_MAX)) { //fprintf(stderr, "gkStore()-- opening '%s' for read-only access.\n", _storePath); if (AS_UTL_fileExists(_storePath, true, false) == false) { fprintf(stderr, "gkStore()-- failed to open '%s' for read-only access: store doesn't exist.\n", _storePath); exit(1); } sprintf(name, "%s/libraries", _storePath); _librariesMMap = new memoryMappedFile (name, memoryMappedFile_readOnly); _libraries = (gkLibrary *)_librariesMMap->get(0); sprintf(name, "%s/reads", _storePath); _readsMMap = new memoryMappedFile (name, memoryMappedFile_readOnly); _reads = (gkRead *)_readsMMap->get(0); sprintf(name, "%s/blobs", _storePath); _blobsMMap = new memoryMappedFile (name, memoryMappedFile_readOnly); _blobs = (void *)_blobsMMap->get(0); } // // MODIFY, NO APPEND (also for building a partitioned store) // else if ((mode == gkStore_modify) && (partID == UINT32_MAX)) { //fprintf(stderr, "gkStore()-- opening '%s' for read-write access.\n", _storePath); if (AS_UTL_fileExists(_storePath, true, false) == false) { fprintf(stderr, "gkStore()-- failed to open '%s' for read-write access: store doesn't exist.\n", _storePath); exit(1); } sprintf(name, "%s/libraries", _storePath); _librariesMMap = new memoryMappedFile (name, memoryMappedFile_readWrite); _libraries = (gkLibrary *)_librariesMMap->get(0); sprintf(name, "%s/reads", _storePath); _readsMMap = new memoryMappedFile (name, memoryMappedFile_readWrite); _reads = (gkRead *)_readsMMap->get(0); sprintf(name, "%s/blobs", _storePath); _blobsMMap = new memoryMappedFile (name, memoryMappedFile_readWrite); _blobs = (void *)_blobsMMap->get(0); } // // MODIFY, APPEND, open mmap'd files, but copy them entirely to local memory // else if ((mode == gkStore_extend) && (partID == UINT32_MAX)) { //fprintf(stderr, "gkStore()-- opening '%s' for read-write and append access.\n", _storePath); if (AS_UTL_fileExists(_storePath, true, true) == false) AS_UTL_mkdir(_storePath); _librariesAlloc = MAX(64, 2 * _info.numLibraries); _libraries = new gkLibrary [_librariesAlloc]; sprintf(name, "%s/libraries", _storePath); if (AS_UTL_fileExists(name, false, false) == true) { _librariesMMap = new memoryMappedFile (name, memoryMappedFile_readOnly); memcpy(_libraries, _librariesMMap->get(0), sizeof(gkLibrary) * (_info.numLibraries + 1)); delete _librariesMMap; _librariesMMap = NULL;; } _readsAlloc = MAX(128, 2 * _info.numReads); _reads = new gkRead [_readsAlloc]; sprintf(name, "%s/reads", _storePath); if (AS_UTL_fileExists(name, false, false) == true) { _readsMMap = new memoryMappedFile (name, memoryMappedFile_readOnly); memcpy(_reads, _readsMMap->get(0), sizeof(gkRead) * (_info.numReads + 1)); delete _readsMMap; _readsMMap = NULL; } sprintf(name, "%s/blobs", _storePath); _blobsMMap = NULL; _blobs = NULL; errno = 0; _blobsFile = fopen(name, "a+"); if (errno) fprintf(stderr, "gkStore()-- Failed to open blobs file '%s' for appending: %s\n", name, strerror(errno)), exit(1); } // // PARTITIONED, no modifications, no appends // // BIG QUESTION: do we want to partition the read metadata too, or is it small enough // to load in every job? For now, we load all the metadata. else if ((mode == gkStore_readOnly) && (partID != UINT32_MAX)) { //fprintf(stderr, "gkStore()-- opening '%s' partition '%u' for read-only access.\n", _storePath, partID); // For partitioned reads, we need to have a uint32 map of readID to partitionReadID so we can // lookup the metadata in the partitoned _reads data. This is 4 bytes per read, compared to 24 // bytes for the full meta data. Assuming 100x of 3kb read coverage on human, that's 100 // million reads, so 0.400 GB vs 2.4 GB. sprintf(name, "%s/partitions/map", _storePath); errno = 0; FILE *F = fopen(name, "r"); if (errno) fprintf(stderr, "gkStore::gkStore()-- failed to open '%s' for reading: %s\n", name, strerror(errno)), exit(1); AS_UTL_safeRead(F, &_numberOfPartitions, "gkStore::_numberOfPartitions", sizeof(uint32), 1); _partitionID = partID; _readsPerPartition = new uint32 [_numberOfPartitions + 1]; // No zeroth element in any of these _readIDtoPartitionID = new uint32 [gkStore_getNumReads() + 1]; _readIDtoPartitionIdx = new uint32 [gkStore_getNumReads() + 1]; AS_UTL_safeRead(F, _readsPerPartition, "gkStore::_readsPerPartition", sizeof(uint32), _numberOfPartitions + 1); AS_UTL_safeRead(F, _readIDtoPartitionID, "gkStore::_readIDtoPartitionID", sizeof(uint32), gkStore_getNumReads() + 1); AS_UTL_safeRead(F, _readIDtoPartitionIdx, "gkStore::_readIDtoPartitionIdx", sizeof(uint32), gkStore_getNumReads() + 1); fclose(F); sprintf(name, "%s/libraries", _storePath); _librariesMMap = new memoryMappedFile (name, memoryMappedFile_readOnly); _libraries = (gkLibrary *)_librariesMMap->get(0); //fprintf(stderr, " -- openend '%s' at "F_X64"\n", name, _libraries); sprintf(name, "%s/partitions/reads.%04"F_U32P"", _storePath, partID); _readsMMap = new memoryMappedFile (name, memoryMappedFile_readOnly); _reads = (gkRead *)_readsMMap->get(0); //fprintf(stderr, " -- openend '%s' at "F_X64"\n", name, _reads); sprintf(name, "%s/partitions/blobs.%04"F_U32P"", _storePath, partID); _blobsMMap = new memoryMappedFile (name, memoryMappedFile_readOnly); _blobs = (void *)_blobsMMap->get(0); //fprintf(stderr, " -- openend '%s' at "F_X64"\n", name, _blobs); } // Info only, no access to reads or libraries. else if (mode == gkStore_infoOnly) { //fprintf(stderr, "gkStore()-- opening '%s' for info-only access.\n", _storePath); } else { fprintf(stderr, "gkStore::gkStore()-- invalid mode '%s' with partition ID %u.\n", toString(mode), partID); assert(0); } }