void partitionBySize(char *prefix, uint64 partitionSize, char *filename) { seqCache *F = new seqCache(filename); uint32 n = F->getNumberOfSequences(); partition_s *p = loadPartition(F); uint32 openP = 1; // Currently open partition uint32 sizeP = 0; // Size of open partition uint32 seqsP = n; // Number of sequences to partition // For any sequences larger than partitionSize, create // partitions containing just one sequence // for (uint32 i=0; i<n; i++) { if (p[i].length > partitionSize) { p[i].partition = openP++; seqsP--; } } // For the remaining, iterate through the list, // greedily placing the longest sequence that fits // into the open partition // while (seqsP > 0) { for (uint32 i=0; i<n; i++) { if ((p[i].partition == 0) && (p[i].length + sizeP < partitionSize)) { p[i].partition = openP; sizeP += p[i].length; seqsP--; } } openP++; sizeP = 0; } outputPartition(F, prefix, p, openP-1, n); delete [] p; delete F; }
void partitionByBucket(char *prefix, uint64 partitionSize, char *filename) { seqCache *F = new seqCache(filename); uint32 n = F->getNumberOfSequences(); partition_s *p = loadPartition(F); if (partitionSize > n) partitionSize = n; // The size, in bases, of each partition // uint32 *s = new uint32 [partitionSize]; for (uint32 i=0; i<partitionSize; i++) s[i] = 0; // For each sequence // for (uint32 nextS=0; nextS<n; nextS++) { // find the smallest partition // uint32 openP = 0; for (uint32 i=0; i<partitionSize; i++) if (s[i] < s[openP]) openP = i; // add the next largest sequence to the open partition // s[openP] += p[nextS].length; p[nextS].partition = openP+1; } outputPartition(F, prefix, p, (uint32)partitionSize, n); delete [] p; delete F; }
void gbLoadRna(char* reloadList) /* Sync the database with the state in the genbank respository. */ { struct gbIndex* index = gbIndexNew(gDatabase, NULL); struct gbSelect* selectList, *select; struct sqlConnection* conn; /* must go through all tables if any reload is selected, * extFile update is requested, or rebuilding derived */ if ((reloadList != NULL) || gReload) gOptions.flags |= DBLOAD_BYPASS_GBLOADED; if (gReload && (gOptions.flags & DBLOAD_DRY_RUN)) errAbort("can't specify both -reload and -dryRun"); gbVerbEnter(1, "gbLoadRna"); conn = hAllocConn(gDatabase); gbLockDb(conn, NULL); if (gOptions.flags & DBLOAD_INITIAL) checkInitialLoad(conn); /* delete anything on the reload list up front */ if (((gOptions.flags & DBLOAD_DRY_RUN) == 0) && (reloadList != NULL)) { gbAlignDataInit(gWorkDir, &gOptions, conn); gbReloadDelete(gDatabase, reloadList, &gOptions, gWorkDir); } selectList = dbLoadPartitionsGet(&gOptions, index); if ((gOptions.flags & DBLOAD_INITIAL) && (selectList == NULL)) errAbort("-initialLoad specified and no sequences were found to load"); /* clean up any ignored entries before setting anything up */ gbVerbEnter(3, "delete ignored"); gbIgnoredDelete(gDatabase, selectList, gForceIgnoreDelete, &gOptions, gWorkDir); gbVerbLeave(3, "delete ignored"); /* loaded table to track updates that have been processed */ gLoadedTbl = gbLoadedTblNew(conn); /* load each partition */ for (select = selectList; select != NULL; select = select->next) loadPartition(select, conn); /* If we are delaying table load, now is the time */ if ((gOptions.flags & DBLOAD_INITIAL) && ((gOptions.flags & DBLOAD_DRY_RUN) == 0)) loadDelayedTables(); /* clean up extFile table if we change references for any seq */ if ((gOptions.flags & DBLOAD_EXT_FILE_UPDATE) && ((gOptions.flags & DBLOAD_DRY_RUN) == 0)) cleanExtFileTable(); /* clean up */ slFreeList(&selectList); gbMetaDataFree(); gbLoadedTblFree(&gLoadedTbl); gbUnlockDb(conn, NULL); hFreeConn(&conn); /* must go to stderr to be logged */ gbVerbLeave(1, "gbLoadRna"); if (gMaxShrinkageError) errAbort("Stoping due to maxShrinkage limit being exceeded in one or more\n" "partitions. Investigate and rerun with -allowLargeDeletes."); }