Ejemplo n.º 1
0
void
gkRead::gkRead_copyDataToPartition(void *blobs, FILE **partfiles, uint64 *partfileslen, uint32 partID) {

  //  Stash away the location of the partitioned data

  assert(partfileslen[partID] == AS_UTL_ftell(partfiles[partID]));

  //  Figure out where the blob actually is, and make sure that it really is a blob

  uint8  *blob    = (uint8 *)blobs + _mPtr;
  uint32  blobLen = 8 + *((uint32 *)blob + 1);

  assert(blob[0] == 'B');
  assert(blob[1] == 'L');
  assert(blob[2] == 'O');
  assert(blob[3] == 'B');

  //  Write the blob to the partition, update the length of the partition

  AS_UTL_safeWrite(partfiles[partID], blob, "gkRead::gkRead_copyDataToPartition::blob", sizeof(char), blobLen);

  //  Update the read to the new location of the blob in the partitioned data.

  _mPtr = partfileslen[partID];
  _pID  = partID;

  //  And finalize by remembering the length.

  partfileslen[partID] += blobLen;

  assert(partfileslen[partID] == AS_UTL_ftell(partfiles[partID]));

}
Ejemplo n.º 2
0
void
AS_UTL_safeWrite(FILE *file, const void *buffer, const char *desc, size_t size, size_t nobj) {
  size_t  position = 0;
  size_t  length   = 32 * 1024 * 1024 / size;
  size_t  towrite  = 0;
  size_t  written  = 0;

#ifdef VERIFY_WRITE_POSITIONS
  off_t   expectedposition = AS_UTL_ftell(file) + nobj * size;
  if (errno)
    //  If we return, and errno is set, the stream isn't seekable.
    expectedposition = 0;
#endif

  while (position < nobj) {
    towrite = length;
    if (position + towrite > nobj)
      towrite = nobj - position;

    errno = 0;
    written = fwrite(((char *)buffer) + position * size, size, towrite, file);

    if (errno) {
      fprintf(stderr, "safeWrite()-- Write failure on %s: %s\n", desc, strerror(errno));
      fprintf(stderr, "safeWrite()-- Wanted to write "F_SIZE_T" objects (size="F_SIZE_T"), wrote "F_SIZE_T".\n",
              towrite, size, written);
      assert(errno == 0);
    }

    position += written;
  }

  //  This catches a bizarre bug on FreeBSD (6.1 for sure, 4.10 too, I
  //  think) where we write at the wrong location; see fseek below.
  //
  //  UNFORTUNATELY, you can't ftell() on stdio.
  //
#ifdef VERIFY_WRITE_POSITIONS
  if ((expectedposition > 0) &&
      (AS_UTL_ftell(file) != expectedposition)) {
    fprintf(stderr, "safeWrite()-- EXPECTED "F_OFF_T", ended up at "F_OFF_T"\n",
            expectedposition, AS_UTL_ftell(file));
    assert(AS_UTL_ftell(file) == expectedposition);
  }
#endif
}
Ejemplo n.º 3
0
void
AS_UTL_fseek(FILE *stream, off_t offset, int whence) {
    off_t   beginpos = AS_UTL_ftell(stream);

    //  If the stream is already at the correct position, just return.
    //
    //  Unless we're on FreeBSD.  For unknown reasons, FreeBSD fails
    //  updating the gkpStore with mate links.  It seems to misplace the
    //  file pointer, and ends up writing the record to the wrong
    //  location.  ftell() is returning the correct current location,
    //  and so AS_PER_genericStore doesn't seek() and just writes to the
    //  current position.  At the end of the write, we're off by 4096
    //  bytes.
    //
    //  LINK 498318175,1538 <-> 498318174,1537
    //  AS_UTL_fseek()--  seek to 159904 (whence=0); already there
    //  safeWrite()-- write nobj=1x104 = 104 bytes at position 159904
    //  safeWrite()-- wrote nobj=1x104 = 104 bytes position now 164000
    //  safeWrite()-- EXPECTED 160008, ended up at 164000
    //
#if !defined __FreeBSD__ && !defined __osf__ && !defined __APPLE__
    if ((whence == SEEK_SET) && (beginpos == offset)) {
#ifdef DEBUG_SEEK
        //  This isn't terribly informative, and adds a lot of clutter.
        //fprintf(stderr, "AS_UTL_fseek()--  seek to "F_OFF_T" (whence=%d); already there\n", offset, whence);
#endif
        return;
    }
#endif  //  __FreeBSD__

    errno = 0;
    fseeko(stream, offset, whence);
    if (errno) {
        fprintf(stderr, "AS_UTL_fseek()--  Failed with %s.\n", strerror(errno));
        assert(errno == 0);
    }

#ifdef DEBUG_SEEK
    fprintf(stderr, "AS_UTL_fseek()--  seek to "F_OFF_T" (requested "F_OFF_T", whence=%d) from "F_OFF_T"\n",
            AS_UTL_ftell(stream), offset, whence, beginpos);
#endif

    if (whence == SEEK_SET)
        assert(AS_UTL_ftell(stream) == offset);
}
Ejemplo n.º 4
0
//  Dump a block of encoded data to disk, then update the gkRead to point to it.
//
void
gkStore::gkStore_stashReadData(gkRead *read, gkReadData *data) {

  assert(_blobsFile != NULL);

  read->_mPtr = AS_UTL_ftell(_blobsFile);
  read->_pID  = _partitionID;                //  0 if not partitioned

  //fprintf(stderr, "STASH read %u at position "F_SIZE_T"\n", read->gkRead_readID(), AS_UTL_ftell(_blobsFile));

  AS_UTL_safeWrite(_blobsFile,
                   data->_blob,
                   "gkStore_stashReadData::blob",
                   sizeof(char),
                   data->_blobLen);
}
Ejemplo n.º 5
0
uint32 *
ovStore::numOverlapsPerFrag(uint32 &firstFrag, uint32 &lastFrag) {

  if (_firstIIDrequested > _lastIIDrequested)
    return(NULL);

  firstFrag = _firstIIDrequested;
  lastFrag  = _lastIIDrequested;

  size_t originalPosition = AS_UTL_ftell(_offtFile);

  AS_UTL_fseek(_offtFile, (size_t)_firstIIDrequested * sizeof(ovStoreOfft), SEEK_SET);

  //  Even if we're doing a whole human-size store, this allocation is
  //  (a) temporary and (b) only 512MB.  The only current consumer of
  //  this code is FragCorrectOVL.c, which doesn't run on the whole
  //  human, it runs on ~24 pieces, which cuts this down to < 32MB.

  uint64 len = _lastIIDrequested - _firstIIDrequested + 1;

  ovStoreOfft  *offsets = new ovStoreOfft [len];
  uint32       *numolap = new uint32      [len];

  uint64 act = AS_UTL_safeRead(_offtFile, offsets, "ovStore::numOverlapsInRange::offsets", sizeof(ovStoreOfft), len);

  if (len != act)
    fprintf(stderr, "AS_OVS_numOverlapsPerFrag()-- short read on offsets!  Expected len="F_U64" read act="F_U64"\n", len, act), exit(1);

  for (uint64 i=0; i<len; i++)
    numolap[i] = offsets[i]._numOlaps;

  delete [] offsets;

  AS_UTL_fseek(_offtFile, originalPosition, SEEK_SET);

  return(numolap);
}
Ejemplo n.º 6
0
uint64
ovStore::numOverlapsInRange(void) {
  size_t                     originalposition = 0;
  uint64                     i = 0;
  uint64                     len = 0;
  ovStoreOfft  *offsets = NULL;
  uint64                     numolap = 0;

  if (_firstIIDrequested > _lastIIDrequested)
    return(0);

  originalposition = AS_UTL_ftell(_offtFile);

  AS_UTL_fseek(_offtFile, (size_t)_firstIIDrequested * sizeof(ovStoreOfft), SEEK_SET);

  //  Even if we're doing a whole human-size store, this allocation is
  //  (a) temporary and (b) only 512MB.  The only current consumer of
  //  this code is FragCorrectOVL.c, which doesn't run on the whole
  //  human, it runs on ~24 pieces, which cuts this down to < 32MB.

  len = _lastIIDrequested - _firstIIDrequested + 1;
  offsets = new ovStoreOfft [len];

  if (len != AS_UTL_safeRead(_offtFile, offsets, "AS_OVS_numOverlapsInRange", sizeof(ovStoreOfft), len)) {
    fprintf(stderr, "AS_OVS_numOverlapsInRange()-- short read on offsets!\n");
    exit(1);
  }

  for (i=0; i<len; i++)
    numolap += offsets[i]._numOlaps;

  delete [] offsets;

  AS_UTL_fseek(_offtFile, originalposition, SEEK_SET);

  return(numolap);
}
Ejemplo n.º 7
0
int
main(int argc, char **argv) {
    int            msglist[NUM_OF_REC_TYPES + 1];
    FILE          *outfile[NUM_OF_REC_TYPES + 1];
    off_t          count[NUM_OF_REC_TYPES + 1];
    off_t          size[NUM_OF_REC_TYPES + 1];
    int            i;

    for (i=0; i<=NUM_OF_REC_TYPES; i++) {
        msglist[i] = 0;
        outfile[i] = 0L;
        count[i]   = 0;
        size[i]    = 0;
    }

    int arg = 1;
    int inc = 0;
    int err = 0;
    int msg = 0;

    argc = AS_configure(argc, argv);

    while (arg < argc) {
        if        (strcmp(argv[arg], "-i") == 0) {
            inc = 1;
        } else if (strcmp(argv[arg], "-x") == 0) {
            inc = 0;
        } else if (strcmp(argv[arg], "-o") == 0) {
            errno = 0;
            FILE *F = fopen(argv[++arg], "w");
            if (errno)
                fprintf(stderr, "%s: failed to open output file '%s': %s\n", argv[0], argv[arg], strerror(errno)), exit(1);

            //  Depending on the include flag, we either write all messages
            //  listed in our msglist (or write all message not in the
            //  msglist) to the freshly opened file.
            //
            if (inc) {
                //  Include message i in the output if it was listed
                for (i=1; i<=NUM_OF_REC_TYPES; i++)
                    if ((outfile[i] == NULL) && (msglist[i] > 0))
                        outfile[i] = F;
            } else {
                //  Include message i in the output if it was not listed
                for (i=1; i<=NUM_OF_REC_TYPES; i++)
                    if ((outfile[i] == NULL) && (msglist[i] == 0))
                        outfile[i] = F;
            }

            for (i=0; i<=NUM_OF_REC_TYPES; i++)
                msglist[i] = 0;
        } else if (strcmp(argv[arg], "-m") == 0) {
            int type = GetMessageType(argv[++arg]);
            if ((type >= 1) && (type <= NUM_OF_REC_TYPES)) {
                msglist[type]++;
                msg++;
            } else {
                fprintf(stderr, "%s: invalid message type '%s'.\n", argv[0], argv[arg]);
                err = 1;
            }
        } else if (strcmp(argv[arg], "-h") == 0) {
            err = 1;
        } else {
            int type = GetMessageType(argv[arg]);
            if ((type >= 1) && (type <= NUM_OF_REC_TYPES)) {
                msglist[type]++;
                msg++;
            } else {
                fprintf(stderr, "%s: invalid option '%s'.\n", argv[0], argv[arg]);
                err = 1;
            }
        }
        arg++;
    }

    if (err)
        usage(argv[0]), exit(1);

    //  Assume everything else goes to stdout.  We need to obey the inc
    //  flag, still, though.
    //
    if (inc) {
        //  Include message i in the output if it was listed
        for (i=1; i<=NUM_OF_REC_TYPES; i++)
            if ((outfile[i] == NULL) && (msglist[i] > 0))
                outfile[i] = stdout;
    } else {
        //  Include message i in the output if it was not listed
        for (i=1; i<=NUM_OF_REC_TYPES; i++)
            if ((outfile[i] == NULL) && (msglist[i] == 0))
                outfile[i] = stdout;
    }

    GenericMesg   *pmesg;
    off_t          currPos = 0;
    off_t          prevPos = 0;

    while (ReadProtoMesg_AS(stdin, &pmesg) != EOF) {
        assert(pmesg->t <= NUM_OF_REC_TYPES);

        currPos = AS_UTL_ftell(stdin);

        if (outfile[pmesg->t] != NULL) {
            count[pmesg->t]++;

            size[pmesg->t] += currPos - prevPos;

            WriteProtoMesg_AS(outfile[pmesg->t], pmesg);
        }

        prevPos = currPos;
    }

    for (i=0; i<=NUM_OF_REC_TYPES; i++)
        if (count[i] > 0)
            fprintf(stderr, "%s num "F_OFF_T" size "F_OFF_T" avg %f\n",
                    MessageTypeName[i], count[i], size[i], (double)size[i] / count[i]);

    exit(0);
}