示例#1
0
int main(int argc, char *argv[])
/* Check parameters, set up, loop through each GenBank file. */
{
char *gbName;
int argi = 1;
struct hash *estAuthorHash = NULL;
char *pepFa;

optionInit(&argc, argv, optionSpecs);
if (argc < 4)
    usage();

gByAccPrefixSize = optionInt("byAccPrefix", 0);
gbIdxName = optionVal("gbidx", NULL);
pepFa = optionVal("pepFa", NULL);
gbType = gbParseType(optionVal("type", "mrna,est"));
gbOrg  = optionVal("org", NULL);
inclXMs = optionExists("inclXMs");

if (gByAccPrefixSize > 4)  /* keep small to avoid tons of open files */
    errAbort("max value of -byAccPrefix is 4");

gCurAccPrefix[0] = '\0';

faName = argv[argi++];
raName = argv[argi++];

estAuthorHash = newHash(23);
kvt = newKvt(5*1024);
gbfInit();

if (pepFa != NULL)
    gPepFa = gbFaOpen(pepFa,"w");

char *blackList = optionVal("blackList", NULL);
if (blackList != NULL)
    blackListRanges = genbankBlackListParse(blackList);

while (argi < argc)
    {
    gbName = argv[argi++];
    printf("Processing %s into %s and %s\n", gbName, faName, raName);
    procOneGbFile(gbName, estAuthorHash);
    }

gbFaClose(&faFile);
gbFaClose(&gPepFa);
carefulClose(&raFile);
carefulClose(&gbIdxFile);

return 0;
}
示例#2
0
static void getFastaOffsets(struct brokenRefPepTbl *brpTbl,
                            struct sqlConnection *conn,
                            struct extFileTbl* extFileTbl,
                            char *faPath)
/* parse fasta file to get offsets of proteins */
{
struct gbFa *fa = gbFaOpen(faPath, "r");
char acc[GB_ACC_BUFSZ];
struct brokenRefPep *brp;
HGID extId = extFileTblGet(extFileTbl, conn, faPath);

gbVerbMsg(5, "scanning fasta: %s", faPath);
while (gbFaReadNext(fa))
    {
    gbVerbMsg(5, "   %s: %lld", fa->id, (long long)fa->recOff);
    /* save only if same acecss, version, and file (to match mrna fa) */
    short ver = gbSplitAccVer(fa->id, acc);
    brp = hashFindVal(brpTbl->protAccHash, acc);
    if ((brp != NULL) && (ver == brp->protVer) && sameString(faPath, brp->newFaPath))
        {
        gbFaGetSeq(fa); /* force read of sequence data */
        brp->newFaId = extId;
        brp->newFaOff = fa->recOff;
        brp->newSeqSize = fa->seqLen;
        brp->newRecSize = fa->off-fa->recOff;
        gbVerbMsg(5, "      save: %s %lld for %lld\n", fa->id, (long long)fa->recOff, (long long)fa->off);
        }
    }
gbFaClose(&fa);
}
示例#3
0
static void openByAccPrefix(char* accPrefix)
/* Open up the by accession prefix */
{
char *mode;
char raPath[PATH_LEN], faPath[PATH_LEN], gbIdxPath[PATH_LEN];

carefulClose(&raFile);
gbFaClose(&faFile);
if (gbIdxName != NULL)
    carefulClose(&gbIdxFile);
                    
makeAccPrefixedFile(accPrefix, raName, raPath);
mode = isFirstOpen(raPath) ? "w" : "a";
raFile = mustOpen(raPath, mode);

makeAccPrefixedFile(accPrefix, faName, faPath);
faFile = gbFaOpen(faPath, mode);

if (gbIdxName != NULL)
    {
    makeAccPrefixedFile(accPrefix, gbIdxName, gbIdxPath);
    gbIdxFile = mustOpen(gbIdxPath, mode);
    }
strcpy(gCurAccPrefix, accPrefix);
}
void copySelectedFasta(struct gbSelect* select)
/* copy FASTA records that were selected for alignment, segregating by
 * native/xeno, and partitioning large files. */
{
char inFasta[PATH_LEN];
struct gbFa* inFa;
struct outFa* nativeFa = NULL;
struct outFa* xenoFa = NULL;
if (select->orgCats & GB_NATIVE)
    nativeFa = outFaNew(select, GB_NATIVE);
if (select->orgCats & GB_XENO)
    xenoFa = outFaNew(select, GB_XENO);

gbProcessedGetPath(select, "fa", inFasta);
gbVerbEnter(2, "copying from %s", inFasta);
inFa = gbFaOpen(inFasta, "r");

while (copyFastaRec(select, inFa, nativeFa, xenoFa))
    continue;

outFaFree(&nativeFa);
outFaFree(&xenoFa);
gbFaClose(&inFa);
gbVerbLeave(2, "copying from %s", inFasta);
}
示例#5
0
void seqDataProcessUpdate(struct gbSelect* select)
/* Get sequences for a partition and update.  Partition processed index should
 * be loaded and selected versions flaged. */
{
char inFasta[PATH_LEN];
struct gbFa* inFa;
gbProcessedGetPath(select, "fa", inFasta);
inFa = gbFaOpen(inFasta, "r"); 
while (gbFaReadNext(inFa))
    processSeq(select, inFa);
gbFaClose(&inFa);
}
void outFaClose(struct outFa* outFa)
/* close file and output file path.  Doesn't delete object */
{
if (outFa->fa != NULL)
    {
    printf("alignFa: %s %s %d %lld\n", outFa->fa->fileName,
           gbOrgCatName(outFa->select.orgCats), outFa->numSeqs,
           outFa->numBases);
    gbFaClose(&outFa->fa);
    carefulClose(&outFa->polyAFh);
    }
}
示例#7
0
void seqDataClose()
/* close the output file */
{
gbFaClose(&gOutFa);
}