Beispiel #1
0
void
ovStore::ovStore_write(void) {
  AS_UTL_mkdir(_storePath);

  char name[FILENAME_MAX];

  sprintf(name, "%s/info", _storePath);

  //  If the ovs file exists, AND has a valid magic number, then the store is complete and we should
  //  abort before the valid store is destroyed.

  if (AS_UTL_fileExists(name, false, false)) {
    errno = 0;
    FILE *ovsinfo = fopen(name, "r");
    if (errno) {
      fprintf(stderr, "ERROR: failed to read store metadata from '%s': %s\n", name, strerror(errno));
      exit(1);
    }

    AS_UTL_safeRead(ovsinfo, &_info, "ovStore::ovStore::testinfo", sizeof(ovStoreInfo), 1);

    fclose(ovsinfo);

    if (_info._ovsMagic == ovStoreMagic)
      fprintf(stderr, "ERROR:  overlapStore '%s' is a valid overlap store, will not overwrite.\n",
              _storePath), exit(1);
  }

  //  Create a new incomplete info file.

  errno = 0;
  FILE *ovsinfo = fopen(name, "w");

  if (errno)
    fprintf(stderr, "failed to create overlap store '%s': %s\n", _storePath, strerror(errno)), exit(1);

  AS_UTL_safeWrite(ovsinfo, &_info, "ovStore::ovStore::saveinfo", sizeof(ovStoreInfo), 1);

  fclose(ovsinfo);

  sprintf(name, "%s/index", _storePath);

  errno = 0;
  _offtFile = fopen(name, "w");
  if (errno)
    fprintf(stderr, "AS_OVS_createOverlapStore()-- failed to open offset file '%s': %s\n", name, strerror(errno)), exit(1);

  _overlapsThisFile = 0;
  _currentFileIndex = 0;
  _bof              = NULL;
}
Beispiel #2
0
bool
FragmentInfo::load(const char *prefix) {
  char  name[FILENAME_MAX];

  sprintf(name, "%s.fragmentInfo", prefix);

  errno = 0;
  FILE *file = fopen(name, "r");
  if (errno)
    return(false);

  uint64  magicNumber   = 0;
  uint64  versionNumber = 0;

  AS_UTL_safeRead(file, &magicNumber,    "fragmentInformationMagicNumber",   sizeof(uint64), 1);
  AS_UTL_safeRead(file, &versionNumber,  "fragmentInformationVersionNumber", sizeof(uint64), 1);
  AS_UTL_safeRead(file, &_numFragments,  "fragmentInformationNumFrgs",       sizeof(uint32), 1);
  AS_UTL_safeRead(file, &_numLibraries,  "fragmentInformationNumLibs",       sizeof(uint32), 1);

  if (magicNumber != fiMagicNumber) {
    writeLog("FragmentInfo()-- File '%s' is not a fragment info; cannot load.\n", name);
    fclose(file);
    return(false);
  }
  if (versionNumber != fiVersionNumber) {
    writeLog("FragmentInfo()-- File '%s' is version "F_U64", I can only read version "F_U64"; cannot load.\n",
            name, versionNumber, fiVersionNumber);
    fclose(file);
    return(false);
  }

  writeLog("FragmentInfo()-- Loading fragment information for "F_U32" fragments and "F_U32" libraries from cache '%s'\n",
          _numFragments, _numLibraries, name);

  _fragLength    = new uint32 [_numFragments + 1];
  _libIID        = new uint32 [_numFragments + 1];

  _numFragsInLib = new uint32 [_numLibraries + 1];

  AS_UTL_safeRead(file,  _fragLength,    "fragmentInformationFragLen",      sizeof(uint32), _numFragments + 1);
  AS_UTL_safeRead(file,  _libIID,        "fragmentInformationLibIID",       sizeof(uint32), _numFragments + 1);

  AS_UTL_safeRead(file,  _numFragsInLib, "fragmentInformationNumFrgsInLib", sizeof(uint32), _numLibraries + 1);

  fclose(file);

  return(true);
}
Beispiel #3
0
void
ovStore::setRange(uint32 firstIID, uint32 lastIID) {
  char            name[FILENAME_MAX];

  //  make the index be one record per read iid, regardless, then we
  //  can quickly grab the correct record, and seek to the start of
  //  those overlaps

  if (firstIID > _info._largestIID)
    firstIID = _info._largestIID + 1;
  if (lastIID >= _info._largestIID)
    lastIID = _info._largestIID;

  //  If our range is invalid (firstIID > lastIID) we keep going, and
  //  let readOverlap() deal with it.

  AS_UTL_fseek(_offtFile, (size_t)firstIID * sizeof(ovStoreOfft), SEEK_SET);

  //  Unfortunately, we need to actually read the record to figure out
  //  where to position the overlap stream.  If the read fails, we
  //  silently return, letting readOverlap() deal with
  //  the problem.

  _offt.clear();

  //  Everything should notice that offsetFile is at EOF and not try
  //  to find overlaps, but, just in case, we set invalid first/last
  //  IIDs.
  //
  _firstIIDrequested = firstIID;
  _lastIIDrequested  = lastIID;

  if (0 == AS_UTL_safeRead(_offtFile, &_offt, "ovStore::setRange::offset", sizeof(ovStoreOfft), 1))
    return;

  _overlapsThisFile = 0;
  _currentFileIndex = _offt._fileno;

  delete _bof;

  sprintf(name, "%s/%04d", _storePath, _currentFileIndex);
  _bof = new ovFile(name, ovFileNormal);

  _bof->seekOverlap(_offt._offset);
}
Beispiel #4
0
uint32
ovStore::readOverlap(ovOverlap *overlap) {

  assert(_isOutput == FALSE);

  //  If we've finished reading overlaps for the current a_iid, get
  //  another a_iid.  If we hit EOF here, we're all done, no more
  //  overlaps.

  while (_offt._numOlaps == 0)
    if (0 == AS_UTL_safeRead(_offtFile, &_offt, "ovStore::readOverlap::offset",
                             sizeof(ovStoreOfft), 1))
      return(0);

  //  And if we've exited the range of overlaps requested, return.

  if (_offt._a_iid > _lastIIDrequested)
    return(0);

  while ((_bof == NULL) ||
         (_bof->readOverlap(overlap) == FALSE)) {
    char name[FILENAME_MAX];

    //  We read no overlap, open the next file and try again.

    if (_bof)
      delete _bof;

    _currentFileIndex++;

    sprintf(name, "%s/%04d", _storePath, _currentFileIndex);
    _bof = new ovFile(name, ovFileNormal);
  }

  overlap->a_iid = _offt._a_iid;
  overlap->g     = _gkp;

  if (_evalues)
    overlap->evalue(_evalues[_offt._overlapID++]);

  _offt._numOlaps--;


  return(1);
}
Beispiel #5
0
uint32 *
ovStore::numOverlapsPerFrag(uint32 &firstFrag, uint32 &lastFrag) {

  if (_firstIIDrequested > _lastIIDrequested)
    return(NULL);

  firstFrag = _firstIIDrequested;
  lastFrag  = _lastIIDrequested;

  size_t originalPosition = AS_UTL_ftell(_offtFile);

  AS_UTL_fseek(_offtFile, (size_t)_firstIIDrequested * sizeof(ovStoreOfft), SEEK_SET);

  //  Even if we're doing a whole human-size store, this allocation is
  //  (a) temporary and (b) only 512MB.  The only current consumer of
  //  this code is FragCorrectOVL.c, which doesn't run on the whole
  //  human, it runs on ~24 pieces, which cuts this down to < 32MB.

  uint64 len = _lastIIDrequested - _firstIIDrequested + 1;

  ovStoreOfft  *offsets = new ovStoreOfft [len];
  uint32       *numolap = new uint32      [len];

  uint64 act = AS_UTL_safeRead(_offtFile, offsets, "ovStore::numOverlapsInRange::offsets", sizeof(ovStoreOfft), len);

  if (len != act)
    fprintf(stderr, "AS_OVS_numOverlapsPerFrag()-- short read on offsets!  Expected len="F_U64" read act="F_U64"\n", len, act), exit(1);

  for (uint64 i=0; i<len; i++)
    numolap[i] = offsets[i]._numOlaps;

  delete [] offsets;

  AS_UTL_fseek(_offtFile, originalPosition, SEEK_SET);

  return(numolap);
}
Beispiel #6
0
uint64
ovStore::numOverlapsInRange(void) {
  size_t                     originalposition = 0;
  uint64                     i = 0;
  uint64                     len = 0;
  ovStoreOfft  *offsets = NULL;
  uint64                     numolap = 0;

  if (_firstIIDrequested > _lastIIDrequested)
    return(0);

  originalposition = AS_UTL_ftell(_offtFile);

  AS_UTL_fseek(_offtFile, (size_t)_firstIIDrequested * sizeof(ovStoreOfft), SEEK_SET);

  //  Even if we're doing a whole human-size store, this allocation is
  //  (a) temporary and (b) only 512MB.  The only current consumer of
  //  this code is FragCorrectOVL.c, which doesn't run on the whole
  //  human, it runs on ~24 pieces, which cuts this down to < 32MB.

  len = _lastIIDrequested - _firstIIDrequested + 1;
  offsets = new ovStoreOfft [len];

  if (len != AS_UTL_safeRead(_offtFile, offsets, "AS_OVS_numOverlapsInRange", sizeof(ovStoreOfft), len)) {
    fprintf(stderr, "AS_OVS_numOverlapsInRange()-- short read on offsets!\n");
    exit(1);
  }

  for (i=0; i<len; i++)
    numolap += offsets[i]._numOlaps;

  delete [] offsets;

  AS_UTL_fseek(_offtFile, originalposition, SEEK_SET);

  return(numolap);
}
Beispiel #7
0
uint32
ovStore::readOverlaps(ovOverlap *overlaps, uint32 maxOverlaps, bool restrictToIID) {
  int    numOvl = 0;

  assert(_isOutput == FALSE);

  //  If we've finished reading overlaps for the current a_iid, get
  //  another a_iid.  If we hit EOF here, we're all done, no more
  //  overlaps.

  while (_offt._numOlaps == 0)
    if (0 == AS_UTL_safeRead(_offtFile, &_offt, "ovStore::readOverlaps::offset", sizeof(ovStoreOfft), 1))
      return(0);

  //  And if we've exited the range of overlaps requested, return.

  if (_offt._a_iid > _lastIIDrequested)
    return(0);

  //  Just a query?  Return the number of overlaps we'd want to read

  if ((overlaps == NULL) || (maxOverlaps == 0))
    return(_offt._numOlaps);

  //  Read all the overlaps for this ID.

  assert(_offt._numOlaps <= maxOverlaps);

  while (((restrictToIID == true)  && (_offt._numOlaps > 0)) ||
         ((restrictToIID == false) && (_offt._numOlaps > 0) && (numOvl < maxOverlaps))) {

    //  Read an overlap.  If this fails, open the next partition and read from there.

    while ((_bof == NULL) ||
           (_bof->readOverlap(overlaps + numOvl) == false)) {
      char name[FILENAME_MAX];

      //  We read no overlap, open the next file and try again.

      delete _bof;

      _currentFileIndex++;

      if (_currentFileIndex > _info._highestFileIndex)
        //  No more files, stop trying to load an overlap.
        break;

      sprintf(name, "%s/%04d", _storePath, _currentFileIndex);
      _bof = new ovFile(name, ovFileNormal);
    }

    //  If the currentFileIndex is invalid, we ran out of overlaps to load.  Don't save that
    //  empty overlap to the list.

    if (_currentFileIndex <= _info._highestFileIndex) {
      overlaps[numOvl].a_iid = _offt._a_iid;
      overlaps[numOvl].g     = _gkp;

      if (_evalues)
        overlaps[numOvl].evalue(_evalues[_offt._overlapID++]);

      numOvl++;

      assert(_offt._numOlaps > 0);

      _offt._numOlaps--;
    }

    //  If restrictToIID == false, we're loading all overlaps up to the end of the store, or the
    //  request last IID.  If to the end of store, we never read a last 'offset' and so a_iid is
    //  still valid (below lastIIDrequested == infinity) but numOlaps is still zero, and the mail
    //  loop terminates.

    if (restrictToIID == false) {
      while (_offt._numOlaps == 0)
        if (0 == AS_UTL_safeRead(_offtFile, &_offt, "ovStore::readOverlap::offset", sizeof(ovStoreOfft), 1))
          break;
      if (_offt._a_iid > _lastIIDrequested)
        break;
    }
  }  //  while space for more overlaps, load overlaps

  assert(numOvl <= maxOverlaps);

  return(numOvl);
}
Beispiel #8
0
void
mergeInfoFiles(char       *storePath,
               uint32      nPieces) {
  ovStoreInfo    infopiece;
  ovStoreInfo    info;

  info._ovsMagic              = ovStoreMagic;
	info._ovsVersion            = ovStoreVersion;
  info._smallestIID           = UINT64_MAX;
  info._largestIID            = 0;
  info._numOverlapsTotal      = 0;
  info._highestFileIndex      = nPieces;
	info._maxReadLenInBits      = AS_MAX_READLEN_BITS;

  ovStoreOfft offm;

  offm._a_iid     = 0;
  offm._fileno    = 1;
  offm._offset    = 0;
  offm._numOlaps  = 0;

  //  Open the new master index output file

  char            name[FILENAME_MAX];

  sprintf(name, "%s/index", storePath);

  errno = 0;
  FILE  *idx = fopen(name, "w");
  if (errno)
    fprintf(stderr, "ERROR: Failed to open '%s': %s\n", name, strerror(errno)), exit(1);

  //  Special case, we need an empty index for the zeroth fragment.

  AS_UTL_safeWrite(idx, &offm, "ovStore::mergeInfoFiles::offsetZero", sizeof(ovStoreOfft), 1);

  //  Process each

  for (uint32 i=1; i<=nPieces; i++) {
    sprintf(name, "%s/%04d.info", storePath, i);

    fprintf(stderr, "Processing '%s'\n", name);

    if (AS_UTL_fileExists(name, FALSE, FALSE) == false) {
      fprintf(stderr, "ERROR: file '%s' not found.\n", name);
      exit(1);
    }

    {
      errno = 0;
      FILE *F = fopen(name, "r");
      if (errno)
        fprintf(stderr, "ERROR: Failed to open '%s': %s\n", name, strerror(errno)), exit(1);
      AS_UTL_safeRead(F, &infopiece, "ovStore::mergeInfoFiles::infopiece", sizeof(ovStoreInfo), 1);
      fclose(F);
    }

    //  Add empty index elements for missing overlaps

    if (infopiece._numOverlapsTotal == 0) {
      fprintf(stderr, "  No overlaps found.\n");
      continue;
    }

    assert(infopiece._smallestIID <= infopiece._largestIID);

    if (info._largestIID + 1 < infopiece._smallestIID)
      fprintf(stderr, "  Adding empty records for fragments "F_U64" to "F_U64"\n",
              info._largestIID + 1, infopiece._smallestIID - 1);

    while (info._largestIID + 1 < infopiece._smallestIID) {
      offm._a_iid     = info._largestIID + 1;
      //offm._fileno    = set elsewhere
      //offm._offset    = set elsewhere
      //offm._numOlaps  = 0;

      AS_UTL_safeWrite(idx, &offm, "ovStore::mergeInfoFiles::offsets", sizeof(ovStoreOfft), 1);

      info._largestIID++;
    }

    //  Copy index elements for existing overlaps.  While copying, update the supposed position
    //  of any fragments with no overlaps.  Without doing this, accessing the store beginning
    //  or ending at such a fragment will fail.

    {
      sprintf(name, "%s/%04d.index", storePath, i);

      errno = 0;
      FILE  *F = fopen(name, "r");
      if (errno)
        fprintf(stderr, "ERROR: Failed to open '%s': %s\n", name, strerror(errno)), exit(1);

      uint32          recsLen = 0;
      uint32          recsMax = 1024 * 1024;
      ovStoreOfft    *recs    = new ovStoreOfft [recsMax];

      recsLen = AS_UTL_safeRead(F, recs, "ovStore::mergeInfoFiles::offsetsLoad", sizeof(ovStoreOfft), recsMax);

      if (recsLen > 0) {
        if (info._largestIID + 1 != recs[0]._a_iid)
          fprintf(stderr, "ERROR: '%s' starts with iid "F_U32", but store only up to "F_U64"\n",
                  name, recs[0]._a_iid, info._largestIID);
        assert(info._largestIID + 1 == recs[0]._a_iid);
      }

      while (recsLen > 0) {
        offm._fileno = recs[recsLen-1]._fileno;  //  Update location of missing stuff.
        offm._offset = recs[recsLen-1]._offset;

				AS_UTL_safeWrite(idx, recs, "ovStore::mergeInfoFiles::offsetsWrite", sizeof(ovStoreOfft), recsLen);

        recsLen = AS_UTL_safeRead(F, recs, "ovStore::mergeInfoFiles::offsetsReLoad", sizeof(ovStoreOfft), recsMax);
      }

      delete [] recs;

      fclose(F);
    }

    //  Update

    info._smallestIID = MIN(info._smallestIID, infopiece._smallestIID);
    info._largestIID  = MAX(info._largestIID,  infopiece._largestIID);

    info._numOverlapsTotal += infopiece._numOverlapsTotal;

    fprintf(stderr, "  Now finished with fragments "F_U64" to "F_U64" -- "F_U64" overlaps.\n",
            info._smallestIID, info._largestIID, info._numOverlapsTotal);
  }

  fclose(idx);


  //  Dump the new store info file

  {
    sprintf(name, "%s/info", storePath);

    errno = 0;
    FILE  *F = fopen(name, "w");
    if (errno)
      fprintf(stderr, "ERROR: Failed to open '%s': %s\n", name, strerror(errno)), exit(1);

    AS_UTL_safeWrite(F, &info, "ovStore::mergeInfoFiles::finalInfo", sizeof(ovStoreInfo), 1);

    fclose(F);
  }

  fprintf(stderr, "\n");
  fprintf(stderr, "Index finalized for reads "F_U64" to "F_U64" with "F_U64" overlaps.\n",
          info._smallestIID,
          info._largestIID,
          info._numOverlapsTotal);
}
Beispiel #9
0
void
ovStore::ovStore_read(void) {
  char  name[FILENAME_MAX];

  sprintf(name, "%s/info", _storePath);
  errno = 0;
  FILE *ovsinfo = fopen(name, "r");
  if (errno)
    fprintf(stderr, "ERROR: directory '%s' is not an ovelrapStore; failed to open info file '%s': %s\n",
            _storePath, name, strerror(errno)), exit(1);

  AS_UTL_safeRead(ovsinfo, &_info, "ovStore::ovStore::info", sizeof(ovStoreInfo), 1);

  fclose(ovsinfo);

  if ((_info._ovsMagic != ovStoreMagic) && (_info._ovsMagic != ovStoreMagicIncomplete))
    fprintf(stderr, "ERROR:  directory '%s' is not an overlapStore; magic number 0x%016"F_X64P" incorrect.\n",
            _storePath, _info._ovsMagic), exit(1);

  if ((_info._ovsMagic != ovStoreMagic) && (_info._ovsMagic != ovStoreMagicIncomplete))
    fprintf(stderr, "ERROR:  overlapStore '%s' is incomplate; creation crashed?\n",
            _storePath), exit(1);

  if (_info._ovsVersion != ovStoreVersion)
    fprintf(stderr, "ERROR:  overlapStore '%s' is version "F_U64"; this code supports only version "F_U64".\n",
            _storePath, _info._ovsVersion, ovStoreVersion), exit(1);

  if (_info._maxReadLenInBits != AS_MAX_READLEN_BITS)
    fprintf(stderr, "ERROR:  overlapStore '%s' is for AS_MAX_READLEN_BITS="F_U64"; this code supports only %d bits.\n",
            _storePath, _info._maxReadLenInBits, AS_MAX_READLEN_BITS), exit(1);

  //  Load stats

#if 0
  sprintf(name, "%s/statistics", _storePath);
  errno = 0;
  FILE *ost = fopen(name, "r");
  if (errno)
    fprintf(stderr, "failed to open the stats file '%s': %s\n", name, strerror(errno)), exit(1);
  AS_UTL_safeRead(ost, &_stats, "ovStore::ovStore::stats", sizeof(OverlapStoreStats), 1);
  fclose(ost);
#endif

  //  Open the index

  sprintf(name, "%s/index", _storePath);

  errno = 0;
  _offtFile = fopen(name, "r");
  if (errno)
    fprintf(stderr, "ERROR:  failed to open offset file '%s': %s\n", name, strerror(errno)), exit(1);

  //  Open erates

  sprintf(name, "%s/evalues", _storePath);

  if (AS_UTL_fileExists(name)) {
    _evaluesMap  = new memoryMappedFile(name, memoryMappedFile_readOnly);
    _evalues     = (uint16 *)_evaluesMap->get(0);
  }

  //_offtMMap   = new memoryMappedFile(name, memoryMappedFile_readOnly);
  //_offts      = (ovStoreOfft *)_offtMMap->get(0);
  //_offtLength = _offtMap->length() / sizeof(ovStoreOfft);
}
Beispiel #10
0
bool
testIndex(char *ovlName,
          bool  doFixes) {
  char name[FILENAME_MAX];
  FILE *I = NULL;
  FILE *F = NULL;

  sprintf(name, "%s/index", ovlName);

  errno = 0;
  I = fopen(name, "r");
  if (errno)
    fprintf(stderr, "ERROR: Failed to open '%s' for reading: %s\n", name, strerror(errno)), exit(1);

  //fprintf(stderr, "TESTING '%s'\n", name);

  if (doFixes) {
    sprintf(name, "%s/index.fixed", ovlName);

    errno = 0;
    F = fopen(name, "w");
    if (errno)
      fprintf(stderr, "ERROR: Failed to open '%s' for writing: %s\n", name, strerror(errno)), exit(1);

    //fprintf(stderr, "WITH FIXES TO '%s'\n", name);
  }

  ovStoreOfft  O;

  uint32  curIID = 0;
  uint32  minIID = UINT32_MAX;
  uint32  maxIID = 0;

  uint32  nErrs = 0;

  while (1 == AS_UTL_safeRead(I, &O, "offset", sizeof(ovStoreOfft), 1)) {
    bool  maxIncreases   = (maxIID < O._a_iid);
    bool  errorDecreased = ((O._a_iid < curIID));
    bool  errorGap       = ((O._a_iid > 0) && (curIID + 1 != O._a_iid));

    if (O._a_iid < minIID)
      minIID = O._a_iid;

    if (maxIncreases)
      maxIID = O._a_iid;

    if (errorDecreased)
      fprintf(stderr, "ERROR: index decreased from "F_U32" to "F_U32"\n", curIID, O._a_iid), nErrs++;
    else if (errorGap)
      fprintf(stderr, "ERROR: gap between "F_U32" and "F_U32"\n", curIID, O._a_iid), nErrs++;

    if ((maxIncreases == true) && (errorGap == false)) {
      if (doFixes)
        AS_UTL_safeWrite(F, &O, "offset", sizeof(ovStoreOfft), 1);

    } else if (O._numOlaps > 0) {
      fprintf(stderr, "ERROR: lost overlaps a_iid "F_U32" fileno "F_U32" offset "F_U32" numOlaps "F_U32"\n",
              O._a_iid, O._fileno, O._offset, O._numOlaps);
    }

    curIID = O._a_iid;
  }

  fclose(I);

  if (F)
    fclose(F);

  return(nErrs == 0);
}
bool
BestOverlapGraph::load(const char *prefix, double AS_UTG_ERROR_RATE, double AS_UTG_ERROR_LIMIT) {
  char name[FILENAME_MAX];

  sprintf(name, "%s.bog", prefix);

  errno = 0;
  FILE *file = fopen(name, "r");
  if (errno)
    return(false);

  assert(_best5 != NULL);
  assert(_best3 != NULL);
  assert(_bestC != NULL);

  uint64 magicNumber;
  uint64 versionNumber;

  AS_UTL_safeRead(file, &magicNumber,   "magicnumber",   sizeof(uint64), 1);
  AS_UTL_safeRead(file, &versionNumber, "versionnumber", sizeof(uint64), 1);

  if (magicNumber != ogMagicNumber) {
    fprintf(logFile, "BestOverlapGraph()-- File '%s' is not a best overlap graph; cannot load graph.\n", name);
    fclose(file);
    return(false);
  }
  if (versionNumber != ogVersionNumber) {
    fprintf(logFile, "BestOverlapGraph()-- File '%s' is version "F_U64", I can only read version "F_U64"; cannot load graph.\n",
            name, versionNumber, ogVersionNumber);
    fclose(file);
    return(false);
  }

  fprintf(logFile, "BestOverlapGraph()-- Loading overlap graph from '%s'.\n", name);

  double  eRate  = 0.0;
  double  eLimit = 0.0;

  AS_UTL_safeRead(file, &eRate,  "errorRate",     sizeof(double), 1);
  AS_UTL_safeRead(file, &eLimit, "errorLimit",    sizeof(double), 1);

  if (eRate  != AS_UTG_ERROR_RATE)
    fprintf(logFile, "BestOverlapGraph()-- Saved graph in '%s' has error rate %f, this run is expecting error rate %f; cannot load graph.\n",
            name, eRate, AS_UTG_ERROR_RATE);
  if (eLimit != AS_UTG_ERROR_LIMIT)
    fprintf(logFile, "BestOverlapGraph()-- Saved graph in '%s' has error limit %f, this run is expecting error limit %f; cannot load graph.\n",
            name, eLimit, AS_UTG_ERROR_LIMIT);
  if ((eRate  != AS_UTG_ERROR_RATE) ||
      (eLimit != AS_UTG_ERROR_LIMIT)) {
    fclose(file);
    return(false);
  }

  AS_UTL_safeRead(file, _best5, "best overlaps", sizeof(BestEdgeOverlap), FI->numFragments() + 1);
  AS_UTL_safeRead(file, _best3, "best overlaps", sizeof(BestEdgeOverlap), FI->numFragments() + 1);
  AS_UTL_safeRead(file, _bestC, "best contains", sizeof(BestContainment), FI->numFragments() + 1);

  for (uint32 i=0; i<FI->numFragments() + 1; i++) {
    if (_bestC[i].olapsLen > 0) {
      _bestC[i].olaps = new uint32 [_bestC[i].olapsLen];
      AS_UTL_safeRead(file, _bestC[i].olaps, "best contains olaps", sizeof(uint32), _bestC[i].olapsLen);
    } else {
      assert(_bestC[i].olaps == NULL);
    }
  }

  fclose(file);

  return(true);
}
Beispiel #12
0
//  The N valid modes for a 'new gkpStore' call:
//
//  1)  Add new reads/libraries, modify old ones.  gkStore(path, true, true)
//  2)  No addition, but can modify old ones.      gkStore(path, true)
//  3)  No addition, no modification.              gkStore(path);
//
gkStore::gkStore(char const *path, gkStore_mode mode, uint32 partID) {
  char    name[FILENAME_MAX];

  memset(_storePath, 0, sizeof(char) * FILENAME_MAX);
  memset(_storeName, 0, sizeof(char) * FILENAME_MAX);

  strcpy(_storePath, path);
  strcpy(_storeName, path);  //  Broken.

  sprintf(name, "%s/info", _storePath);

  //  If the info file exists, load it.

  if (AS_UTL_fileExists(name, false, false) == true) {
    errno = 0;
    FILE *I = fopen(name, "r");
    AS_UTL_safeRead(I, &_info, "gkStore::_info", sizeof(gkStoreInfo), 1);
    fclose(I);
  }

  //  Check sizes are correct.

  uint32  failed = 0;

  if (_info.gkLibrarySize      != sizeof(gkLibrary))
    failed += fprintf(stderr, "ERROR:  gkLibrary size in store = %u, differs from executable = %u\n",
                      _info.gkLibrarySize, sizeof(gkLibrary));

  if (_info.gkReadSize         != sizeof(gkRead))
    failed += fprintf(stderr, "ERROR:  gkRead size in store = %u, differs from executable = %u\n",
                      _info.gkReadSize, sizeof(gkRead));

  if (_info.gkMaxLibrariesBits != AS_MAX_LIBRARIES_BITS)
    failed += fprintf(stderr, "ERROR:  AS_MAX_LIBRARIES_BITS in store = %u, differs from executable = %u\n",
                      _info.gkMaxLibrariesBits, AS_MAX_LIBRARIES_BITS);

  if (_info.gkLibraryNameSize  != LIBRARY_NAME_SIZE)
    failed += fprintf(stderr, "ERROR:  LIBRARY_NAME_SIZE in store = %u, differs from executable = %u\n",
                      _info.gkLibraryNameSize, LIBRARY_NAME_SIZE);

  if (_info.gkMaxReadBits      != AS_MAX_READS_BITS)
    failed += fprintf(stderr, "ERROR:  AS_MAX_READS_BITS in store = %u, differs from executable = %u\n",
                      _info.gkMaxReadBits, AS_MAX_READS_BITS);

  if (_info.gkMaxReadLenBits   != AS_MAX_READLEN_BITS)
    failed += fprintf(stderr, "ERROR:  AS_MAX_READLEN_BITS in store = %u, differs from executable = %u\n",
                      _info.gkMaxReadLenBits, AS_MAX_READLEN_BITS);

  if (failed)
    fprintf(stderr, "ERROR:\nERROR:  Can't open store '%s': parameters in src/AS_global.H are incompatible with the store.\n", _storePath), exit(1);

  assert(_info.gkLibrarySize      == sizeof(gkLibrary));
  assert(_info.gkReadSize         == sizeof(gkRead));

  assert(_info.gkMaxLibrariesBits == AS_MAX_LIBRARIES_BITS);
  assert(_info.gkLibraryNameSize  == LIBRARY_NAME_SIZE);
  assert(_info.gkMaxReadBits      == AS_MAX_READS_BITS);
  assert(_info.gkMaxReadLenBits   == AS_MAX_READLEN_BITS);

  //  Clear ourself, to make valgrind happier.

  _librariesMMap          = NULL;
  _librariesAlloc         = 0;
  _libraries              = NULL;

  _readsMMap              = NULL;
  _readsAlloc             = 0;
  _reads                  = NULL;

  _blobsMMap              = NULL;
  _blobs                  = NULL;
  _blobsFile              = NULL;

  _mode                   = mode;

  _numberOfPartitions     = 0;
  _partitionID            = 0;
  _readIDtoPartitionIdx   = NULL;
  _readIDtoPartitionID    = NULL;
  _readsPerPartition      = NULL;
  //_readsInThisPartition   = NULL;

  //
  //  READ ONLY
  //

  if ((mode == gkStore_readOnly) &&
      (partID == UINT32_MAX)) {
    //fprintf(stderr, "gkStore()--  opening '%s' for read-only access.\n", _storePath);

    if (AS_UTL_fileExists(_storePath, true, false) == false) {
      fprintf(stderr, "gkStore()--  failed to open '%s' for read-only access: store doesn't exist.\n", _storePath);
      exit(1);
    }

    sprintf(name, "%s/libraries", _storePath);
    _librariesMMap = new memoryMappedFile (name, memoryMappedFile_readOnly);
    _libraries     = (gkLibrary *)_librariesMMap->get(0);

    sprintf(name, "%s/reads", _storePath);
    _readsMMap     = new memoryMappedFile (name, memoryMappedFile_readOnly);
    _reads         = (gkRead *)_readsMMap->get(0);

    sprintf(name, "%s/blobs", _storePath);
    _blobsMMap     = new memoryMappedFile (name, memoryMappedFile_readOnly);
    _blobs         = (void *)_blobsMMap->get(0);
  }

  //
  //  MODIFY, NO APPEND (also for building a partitioned store)
  //

  else if ((mode == gkStore_modify) &&
           (partID == UINT32_MAX)) {
    //fprintf(stderr, "gkStore()--  opening '%s' for read-write access.\n", _storePath);

    if (AS_UTL_fileExists(_storePath, true, false) == false) {
      fprintf(stderr, "gkStore()--  failed to open '%s' for read-write access: store doesn't exist.\n", _storePath);
      exit(1);
    }

    sprintf(name, "%s/libraries", _storePath);
    _librariesMMap = new memoryMappedFile (name, memoryMappedFile_readWrite);
    _libraries     = (gkLibrary *)_librariesMMap->get(0);

    sprintf(name, "%s/reads", _storePath);
    _readsMMap     = new memoryMappedFile (name, memoryMappedFile_readWrite);
    _reads         = (gkRead *)_readsMMap->get(0);

    sprintf(name, "%s/blobs", _storePath);
    _blobsMMap     = new memoryMappedFile (name, memoryMappedFile_readWrite);
    _blobs         = (void *)_blobsMMap->get(0);
  }

  //
  //  MODIFY, APPEND, open mmap'd files, but copy them entirely to local memory
  //

  else if ((mode == gkStore_extend) &&
           (partID == UINT32_MAX)) {
    //fprintf(stderr, "gkStore()--  opening '%s' for read-write and append access.\n", _storePath);

    if (AS_UTL_fileExists(_storePath, true, true) == false)
      AS_UTL_mkdir(_storePath);

    _librariesAlloc = MAX(64, 2 * _info.numLibraries);
    _libraries      = new gkLibrary [_librariesAlloc];

    sprintf(name, "%s/libraries", _storePath);
    if (AS_UTL_fileExists(name, false, false) == true) {
      _librariesMMap  = new memoryMappedFile (name, memoryMappedFile_readOnly);

      memcpy(_libraries, _librariesMMap->get(0), sizeof(gkLibrary) * (_info.numLibraries + 1));

      delete _librariesMMap;
      _librariesMMap = NULL;;
    }

    _readsAlloc     = MAX(128, 2 * _info.numReads);
    _reads          = new gkRead [_readsAlloc];

    sprintf(name, "%s/reads", _storePath);
    if (AS_UTL_fileExists(name, false, false) == true) {
      _readsMMap      = new memoryMappedFile (name, memoryMappedFile_readOnly);

      memcpy(_reads, _readsMMap->get(0), sizeof(gkRead) * (_info.numReads + 1));

      delete _readsMMap;
      _readsMMap = NULL;
    }

    sprintf(name, "%s/blobs", _storePath);

    _blobsMMap     = NULL;
    _blobs         = NULL;

    errno = 0;
    _blobsFile     = fopen(name, "a+");
    if (errno)
      fprintf(stderr, "gkStore()--  Failed to open blobs file '%s' for appending: %s\n",
              name, strerror(errno)), exit(1);
  }

  //
  //  PARTITIONED, no modifications, no appends
  //
  //  BIG QUESTION: do we want to partition the read metadata too, or is it small enough
  //  to load in every job?  For now, we load all the metadata.

  else if ((mode == gkStore_readOnly) &&
           (partID != UINT32_MAX)) {
    //fprintf(stderr, "gkStore()--  opening '%s' partition '%u' for read-only access.\n", _storePath, partID);

    //  For partitioned reads, we need to have a uint32 map of readID to partitionReadID so we can
    //  lookup the metadata in the partitoned _reads data.  This is 4 bytes per read, compared to 24
    //  bytes for the full meta data.  Assuming 100x of 3kb read coverage on human, that's 100
    //  million reads, so 0.400 GB vs 2.4 GB.

    sprintf(name, "%s/partitions/map", _storePath);

    errno = 0;
    FILE *F = fopen(name, "r");
    if (errno)
      fprintf(stderr, "gkStore::gkStore()-- failed to open '%s' for reading: %s\n",
              name, strerror(errno)), exit(1);

    AS_UTL_safeRead(F, &_numberOfPartitions, "gkStore::_numberOfPartitions", sizeof(uint32), 1);

    _partitionID            = partID;
    _readsPerPartition      = new uint32 [_numberOfPartitions   + 1];  //  No zeroth element in any of these
    _readIDtoPartitionID    = new uint32 [gkStore_getNumReads() + 1];
    _readIDtoPartitionIdx   = new uint32 [gkStore_getNumReads() + 1];

    AS_UTL_safeRead(F, _readsPerPartition,    "gkStore::_readsPerPartition",    sizeof(uint32), _numberOfPartitions   + 1);
    AS_UTL_safeRead(F, _readIDtoPartitionID,  "gkStore::_readIDtoPartitionID",  sizeof(uint32), gkStore_getNumReads() + 1);
    AS_UTL_safeRead(F, _readIDtoPartitionIdx, "gkStore::_readIDtoPartitionIdx", sizeof(uint32), gkStore_getNumReads() + 1);

    fclose(F);

    sprintf(name, "%s/libraries", _storePath);
    _librariesMMap = new memoryMappedFile (name, memoryMappedFile_readOnly);
    _libraries     = (gkLibrary *)_librariesMMap->get(0);
    //fprintf(stderr, " -- openend '%s' at "F_X64"\n", name, _libraries);

    sprintf(name, "%s/partitions/reads.%04"F_U32P"", _storePath, partID);
    _readsMMap     = new memoryMappedFile (name, memoryMappedFile_readOnly);
    _reads         = (gkRead *)_readsMMap->get(0);
    //fprintf(stderr, " -- openend '%s' at "F_X64"\n", name, _reads);

    sprintf(name, "%s/partitions/blobs.%04"F_U32P"", _storePath, partID);
    _blobsMMap     = new memoryMappedFile (name, memoryMappedFile_readOnly);
    _blobs         = (void *)_blobsMMap->get(0);
    //fprintf(stderr, " -- openend '%s' at "F_X64"\n", name, _blobs);
  }

  //  Info only, no access to reads or libraries.

  else if (mode == gkStore_infoOnly) {
    //fprintf(stderr, "gkStore()--  opening '%s' for info-only access.\n", _storePath);
  }

  else {
    fprintf(stderr, "gkStore::gkStore()-- invalid mode '%s' with partition ID %u.\n",
            toString(mode), partID);
    assert(0);
  }
}