Beispiel #1
0
void
partitionBySize(char *prefix, uint64 partitionSize, char *filename) {
  seqCache     *F = new seqCache(filename);
  uint32        n = F->getNumberOfSequences();
  partition_s  *p = loadPartition(F);

  uint32  openP = 1;  //  Currently open partition
  uint32  sizeP = 0;  //  Size of open partition
  uint32  seqsP = n;  //  Number of sequences to partition

  //  For any sequences larger than partitionSize, create
  //  partitions containing just one sequence
  //
  for (uint32 i=0; i<n; i++) {
    if (p[i].length > partitionSize) {
      p[i].partition = openP++;
      seqsP--;
    }
  }

  //  For the remaining, iterate through the list,
  //  greedily placing the longest sequence that fits
  //  into the open partition
  //
  while (seqsP > 0) {
    for (uint32 i=0; i<n; i++) {
      if ((p[i].partition == 0) &&
          (p[i].length + sizeP < partitionSize)) {
        p[i].partition = openP;
        sizeP += p[i].length;
        seqsP--;
      }
    }

    openP++;
    sizeP = 0;
  }

  outputPartition(F, prefix, p, openP-1, n);

  delete [] p;
  delete    F;
}
Beispiel #2
0
void
partitionByBucket(char *prefix, uint64 partitionSize, char *filename) {
  seqCache     *F = new seqCache(filename);
  uint32        n = F->getNumberOfSequences();
  partition_s  *p = loadPartition(F);

  if (partitionSize > n)
    partitionSize = n;

  //  The size, in bases, of each partition
  //
  uint32       *s = new uint32 [partitionSize];
  for (uint32 i=0; i<partitionSize; i++)
    s[i] = 0;

  //  For each sequence
  //
  for (uint32 nextS=0; nextS<n; nextS++) {

    //  find the smallest partition
    //
    uint32 openP = 0;
    for (uint32 i=0; i<partitionSize; i++)
      if (s[i] < s[openP])
        openP = i;

    //  add the next largest sequence to the open partition
    //
    s[openP] += p[nextS].length;
    p[nextS].partition = openP+1;
  }

  outputPartition(F, prefix, p, (uint32)partitionSize, n);

  delete [] p;
  delete    F;
}
void gbLoadRna(char* reloadList)
/* Sync the database with the state in the genbank respository. */
{
struct gbIndex* index = gbIndexNew(gDatabase, NULL);
struct gbSelect* selectList, *select;
struct sqlConnection* conn;

/* must go through all tables if any reload is selected,
 * extFile update is requested, or rebuilding derived */
if ((reloadList != NULL) || gReload)
    gOptions.flags |= DBLOAD_BYPASS_GBLOADED;

if (gReload && (gOptions.flags & DBLOAD_DRY_RUN))
    errAbort("can't specify both -reload and -dryRun");

gbVerbEnter(1, "gbLoadRna");
conn = hAllocConn(gDatabase);
gbLockDb(conn, NULL);

if (gOptions.flags & DBLOAD_INITIAL)
    checkInitialLoad(conn);

/* delete anything on the reload list up front */
if (((gOptions.flags & DBLOAD_DRY_RUN) == 0) && (reloadList != NULL))
    {
    gbAlignDataInit(gWorkDir, &gOptions, conn);
    gbReloadDelete(gDatabase, reloadList, &gOptions, gWorkDir);
    }

selectList = dbLoadPartitionsGet(&gOptions, index);
if ((gOptions.flags & DBLOAD_INITIAL) && (selectList == NULL))
    errAbort("-initialLoad specified and no sequences were found to load");

/* clean up any ignored entries before setting anything up */
gbVerbEnter(3, "delete ignored");
gbIgnoredDelete(gDatabase, selectList, gForceIgnoreDelete, &gOptions, gWorkDir);
gbVerbLeave(3, "delete ignored");

/* loaded table to track updates that have been processed */
gLoadedTbl = gbLoadedTblNew(conn);

/* load each partition */
for (select = selectList; select != NULL; select = select->next)
    loadPartition(select, conn);

/* If we are delaying table load, now is the time */
if ((gOptions.flags & DBLOAD_INITIAL)
    && ((gOptions.flags & DBLOAD_DRY_RUN) == 0))
    loadDelayedTables();

/* clean up extFile table if we change references for any seq */
if ((gOptions.flags & DBLOAD_EXT_FILE_UPDATE) && ((gOptions.flags & DBLOAD_DRY_RUN) == 0))
    cleanExtFileTable();

/* clean up */
slFreeList(&selectList);
gbMetaDataFree();
gbLoadedTblFree(&gLoadedTbl);
gbUnlockDb(conn, NULL);
hFreeConn(&conn);

/* must go to stderr to be logged */
gbVerbLeave(1, "gbLoadRna");
if (gMaxShrinkageError)
    errAbort("Stoping due to maxShrinkage limit being exceeded in one or more\n"
             "partitions. Investigate and rerun with -allowLargeDeletes.");
}