void gkRead::gkRead_copyDataToPartition(void *blobs, FILE **partfiles, uint64 *partfileslen, uint32 partID) { // Stash away the location of the partitioned data assert(partfileslen[partID] == AS_UTL_ftell(partfiles[partID])); // Figure out where the blob actually is, and make sure that it really is a blob uint8 *blob = (uint8 *)blobs + _mPtr; uint32 blobLen = 8 + *((uint32 *)blob + 1); assert(blob[0] == 'B'); assert(blob[1] == 'L'); assert(blob[2] == 'O'); assert(blob[3] == 'B'); // Write the blob to the partition, update the length of the partition AS_UTL_safeWrite(partfiles[partID], blob, "gkRead::gkRead_copyDataToPartition::blob", sizeof(char), blobLen); // Update the read to the new location of the blob in the partitioned data. _mPtr = partfileslen[partID]; _pID = partID; // And finalize by remembering the length. partfileslen[partID] += blobLen; assert(partfileslen[partID] == AS_UTL_ftell(partfiles[partID])); }
void AS_UTL_safeWrite(FILE *file, const void *buffer, const char *desc, size_t size, size_t nobj) { size_t position = 0; size_t length = 32 * 1024 * 1024 / size; size_t towrite = 0; size_t written = 0; #ifdef VERIFY_WRITE_POSITIONS off_t expectedposition = AS_UTL_ftell(file) + nobj * size; if (errno) // If we return, and errno is set, the stream isn't seekable. expectedposition = 0; #endif while (position < nobj) { towrite = length; if (position + towrite > nobj) towrite = nobj - position; errno = 0; written = fwrite(((char *)buffer) + position * size, size, towrite, file); if (errno) { fprintf(stderr, "safeWrite()-- Write failure on %s: %s\n", desc, strerror(errno)); fprintf(stderr, "safeWrite()-- Wanted to write "F_SIZE_T" objects (size="F_SIZE_T"), wrote "F_SIZE_T".\n", towrite, size, written); assert(errno == 0); } position += written; } // This catches a bizarre bug on FreeBSD (6.1 for sure, 4.10 too, I // think) where we write at the wrong location; see fseek below. // // UNFORTUNATELY, you can't ftell() on stdio. // #ifdef VERIFY_WRITE_POSITIONS if ((expectedposition > 0) && (AS_UTL_ftell(file) != expectedposition)) { fprintf(stderr, "safeWrite()-- EXPECTED "F_OFF_T", ended up at "F_OFF_T"\n", expectedposition, AS_UTL_ftell(file)); assert(AS_UTL_ftell(file) == expectedposition); } #endif }
void AS_UTL_fseek(FILE *stream, off_t offset, int whence) { off_t beginpos = AS_UTL_ftell(stream); // If the stream is already at the correct position, just return. // // Unless we're on FreeBSD. For unknown reasons, FreeBSD fails // updating the gkpStore with mate links. It seems to misplace the // file pointer, and ends up writing the record to the wrong // location. ftell() is returning the correct current location, // and so AS_PER_genericStore doesn't seek() and just writes to the // current position. At the end of the write, we're off by 4096 // bytes. // // LINK 498318175,1538 <-> 498318174,1537 // AS_UTL_fseek()-- seek to 159904 (whence=0); already there // safeWrite()-- write nobj=1x104 = 104 bytes at position 159904 // safeWrite()-- wrote nobj=1x104 = 104 bytes position now 164000 // safeWrite()-- EXPECTED 160008, ended up at 164000 // #if !defined __FreeBSD__ && !defined __osf__ && !defined __APPLE__ if ((whence == SEEK_SET) && (beginpos == offset)) { #ifdef DEBUG_SEEK // This isn't terribly informative, and adds a lot of clutter. //fprintf(stderr, "AS_UTL_fseek()-- seek to "F_OFF_T" (whence=%d); already there\n", offset, whence); #endif return; } #endif // __FreeBSD__ errno = 0; fseeko(stream, offset, whence); if (errno) { fprintf(stderr, "AS_UTL_fseek()-- Failed with %s.\n", strerror(errno)); assert(errno == 0); } #ifdef DEBUG_SEEK fprintf(stderr, "AS_UTL_fseek()-- seek to "F_OFF_T" (requested "F_OFF_T", whence=%d) from "F_OFF_T"\n", AS_UTL_ftell(stream), offset, whence, beginpos); #endif if (whence == SEEK_SET) assert(AS_UTL_ftell(stream) == offset); }
// Dump a block of encoded data to disk, then update the gkRead to point to it. // void gkStore::gkStore_stashReadData(gkRead *read, gkReadData *data) { assert(_blobsFile != NULL); read->_mPtr = AS_UTL_ftell(_blobsFile); read->_pID = _partitionID; // 0 if not partitioned //fprintf(stderr, "STASH read %u at position "F_SIZE_T"\n", read->gkRead_readID(), AS_UTL_ftell(_blobsFile)); AS_UTL_safeWrite(_blobsFile, data->_blob, "gkStore_stashReadData::blob", sizeof(char), data->_blobLen); }
uint32 * ovStore::numOverlapsPerFrag(uint32 &firstFrag, uint32 &lastFrag) { if (_firstIIDrequested > _lastIIDrequested) return(NULL); firstFrag = _firstIIDrequested; lastFrag = _lastIIDrequested; size_t originalPosition = AS_UTL_ftell(_offtFile); AS_UTL_fseek(_offtFile, (size_t)_firstIIDrequested * sizeof(ovStoreOfft), SEEK_SET); // Even if we're doing a whole human-size store, this allocation is // (a) temporary and (b) only 512MB. The only current consumer of // this code is FragCorrectOVL.c, which doesn't run on the whole // human, it runs on ~24 pieces, which cuts this down to < 32MB. uint64 len = _lastIIDrequested - _firstIIDrequested + 1; ovStoreOfft *offsets = new ovStoreOfft [len]; uint32 *numolap = new uint32 [len]; uint64 act = AS_UTL_safeRead(_offtFile, offsets, "ovStore::numOverlapsInRange::offsets", sizeof(ovStoreOfft), len); if (len != act) fprintf(stderr, "AS_OVS_numOverlapsPerFrag()-- short read on offsets! Expected len="F_U64" read act="F_U64"\n", len, act), exit(1); for (uint64 i=0; i<len; i++) numolap[i] = offsets[i]._numOlaps; delete [] offsets; AS_UTL_fseek(_offtFile, originalPosition, SEEK_SET); return(numolap); }
uint64 ovStore::numOverlapsInRange(void) { size_t originalposition = 0; uint64 i = 0; uint64 len = 0; ovStoreOfft *offsets = NULL; uint64 numolap = 0; if (_firstIIDrequested > _lastIIDrequested) return(0); originalposition = AS_UTL_ftell(_offtFile); AS_UTL_fseek(_offtFile, (size_t)_firstIIDrequested * sizeof(ovStoreOfft), SEEK_SET); // Even if we're doing a whole human-size store, this allocation is // (a) temporary and (b) only 512MB. The only current consumer of // this code is FragCorrectOVL.c, which doesn't run on the whole // human, it runs on ~24 pieces, which cuts this down to < 32MB. len = _lastIIDrequested - _firstIIDrequested + 1; offsets = new ovStoreOfft [len]; if (len != AS_UTL_safeRead(_offtFile, offsets, "AS_OVS_numOverlapsInRange", sizeof(ovStoreOfft), len)) { fprintf(stderr, "AS_OVS_numOverlapsInRange()-- short read on offsets!\n"); exit(1); } for (i=0; i<len; i++) numolap += offsets[i]._numOlaps; delete [] offsets; AS_UTL_fseek(_offtFile, originalposition, SEEK_SET); return(numolap); }
int main(int argc, char **argv) { int msglist[NUM_OF_REC_TYPES + 1]; FILE *outfile[NUM_OF_REC_TYPES + 1]; off_t count[NUM_OF_REC_TYPES + 1]; off_t size[NUM_OF_REC_TYPES + 1]; int i; for (i=0; i<=NUM_OF_REC_TYPES; i++) { msglist[i] = 0; outfile[i] = 0L; count[i] = 0; size[i] = 0; } int arg = 1; int inc = 0; int err = 0; int msg = 0; argc = AS_configure(argc, argv); while (arg < argc) { if (strcmp(argv[arg], "-i") == 0) { inc = 1; } else if (strcmp(argv[arg], "-x") == 0) { inc = 0; } else if (strcmp(argv[arg], "-o") == 0) { errno = 0; FILE *F = fopen(argv[++arg], "w"); if (errno) fprintf(stderr, "%s: failed to open output file '%s': %s\n", argv[0], argv[arg], strerror(errno)), exit(1); // Depending on the include flag, we either write all messages // listed in our msglist (or write all message not in the // msglist) to the freshly opened file. // if (inc) { // Include message i in the output if it was listed for (i=1; i<=NUM_OF_REC_TYPES; i++) if ((outfile[i] == NULL) && (msglist[i] > 0)) outfile[i] = F; } else { // Include message i in the output if it was not listed for (i=1; i<=NUM_OF_REC_TYPES; i++) if ((outfile[i] == NULL) && (msglist[i] == 0)) outfile[i] = F; } for (i=0; i<=NUM_OF_REC_TYPES; i++) msglist[i] = 0; } else if (strcmp(argv[arg], "-m") == 0) { int type = GetMessageType(argv[++arg]); if ((type >= 1) && (type <= NUM_OF_REC_TYPES)) { msglist[type]++; msg++; } else { fprintf(stderr, "%s: invalid message type '%s'.\n", argv[0], argv[arg]); err = 1; } } else if (strcmp(argv[arg], "-h") == 0) { err = 1; } else { int type = GetMessageType(argv[arg]); if ((type >= 1) && (type <= NUM_OF_REC_TYPES)) { msglist[type]++; msg++; } else { fprintf(stderr, "%s: invalid option '%s'.\n", argv[0], argv[arg]); err = 1; } } arg++; } if (err) usage(argv[0]), exit(1); // Assume everything else goes to stdout. We need to obey the inc // flag, still, though. // if (inc) { // Include message i in the output if it was listed for (i=1; i<=NUM_OF_REC_TYPES; i++) if ((outfile[i] == NULL) && (msglist[i] > 0)) outfile[i] = stdout; } else { // Include message i in the output if it was not listed for (i=1; i<=NUM_OF_REC_TYPES; i++) if ((outfile[i] == NULL) && (msglist[i] == 0)) outfile[i] = stdout; } GenericMesg *pmesg; off_t currPos = 0; off_t prevPos = 0; while (ReadProtoMesg_AS(stdin, &pmesg) != EOF) { assert(pmesg->t <= NUM_OF_REC_TYPES); currPos = AS_UTL_ftell(stdin); if (outfile[pmesg->t] != NULL) { count[pmesg->t]++; size[pmesg->t] += currPos - prevPos; WriteProtoMesg_AS(outfile[pmesg->t], pmesg); } prevPos = currPos; } for (i=0; i<=NUM_OF_REC_TYPES; i++) if (count[i] > 0) fprintf(stderr, "%s num "F_OFF_T" size "F_OFF_T" avg %f\n", MessageTypeName[i], count[i], size[i], (double)size[i] / count[i]); exit(0); }