Esempio n. 1
0
void udpCountServer(char *portName)
/* countServer - A server that just returns a steadily increasing stream of numbers. */
{
int port = atoi(portName);
int ear, size;
char buf[1024];
int count = 0;
struct timeval startTime, tv;
struct countMessage sendMessage, receiveMessage;
struct sockaddr_in sai;


ZeroVar(&sai);
sai.sin_family = AF_INET;
sai.sin_port = htons(port);
sai.sin_addr.s_addr = INADDR_ANY;
ear = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
if (bind(ear, (struct sockaddr *)&sai, sizeof(sai)) < 0)
    errAbort("Couldn't bind ear");
gettimeofday(&startTime, NULL);
for (;;)
    {
    int err;
    int saiSize = sizeof(sai);
    ZeroVar(&sai);
    sai.sin_family = AF_INET;
    err = recvfrom(ear, &receiveMessage, sizeof(receiveMessage), 
    	0, (struct sockaddr *)&sai, &saiSize);
    if (err < 0)
	{
        warn("couldn't receive %s", strerror(errno));
	continue;
	}
    if (err != sizeof(receiveMessage))
        {
	warn("Message truncated");
	continue;
	}
    gettimeofday(&tv, NULL);
    sendMessage.time = timeDiff(&startTime, &tv);
    sendMessage.echoTime = receiveMessage.time;
    sendMessage.count = ++count;
    sendMessage.message = receiveMessage.message + 256;
    sendto(ear, &sendMessage, sizeof(sendMessage), 0, &sai, sizeof(sai));
    if (!receiveMessage.message)
         break;
    }
close(ear);
printf("All done after %d\n", count);
}
Esempio n. 2
0
struct gbAlignInfo gbAlignFindNeedAligned(struct gbSelect* select,
                                          struct gbSelect* prevSelect)
/* Find entries that need to be aligned or migrated for an update.
 * If prevSelect is not null, and select indicates the full update,
 * alignments will be flagged for migration if possible.  Only the
 * update in select is processed, however alignments for any
 * of the prevSelect updates can be flagged for migration.
 *
 * If an entry is selected for migration:
 *   prevEntry.clientFlags |= MIGRATE_FLAG
 *   entry.clientFlags |= MIGRATE_FLAG
 *   update.selectAligns |= GB_NATIVE or GB_XENO
 *
 * If an entry is selected for alignment:
 *   entry.clientFlags |= MIGRATE_FLAG
 *   update.selectProc |= GB_NATIVE or GB_XENO
 * Returns counts of entries to align or migrate. */
{
struct gbAlignInfo alignInfo;
struct gbProcessed* processed;
ZeroVar(&alignInfo);
        
/* visit all processed entries for this update  */
for (processed = select->update->processed; processed != NULL;
     processed = processed->updateLink)
    {
    /* this will always select entries if this is the full update */
    if (needAlignedSelect(select, processed->entry))
        flagNeedAligned(select, prevSelect, processed, &alignInfo);
    }
return alignInfo;
}
Esempio n. 3
0
void stsMapFromStsMarker(struct stsMarker *oldEl, struct stsMap *el)
/* Convert from older stsMarker format to stsMap format. */
{
ZeroVar(el);
el->chrom = oldEl->chrom;
el->chromStart = oldEl->chromStart;
el->chromEnd = oldEl->chromEnd;
el->name = oldEl->name;
el->score = oldEl->score;
el->identNo = oldEl->identNo;
el->ctgAcc = oldEl->ctgAcc;
el->otherAcc = oldEl->otherAcc;
el->genethonChrom = oldEl->genethonChrom;
el->genethonPos = oldEl->genethonPos;
el->marshfieldChrom = oldEl->marshfieldChrom;
el->marshfieldPos = oldEl->marshfieldPos;
el->gm99Gb4Chrom = oldEl->gm99Gb4Chrom;
el->gm99Gb4Pos = oldEl->gm99Gb4Pos;
el->shgcG3Chrom = oldEl->shgcG3Chrom;
el->shgcG3Pos = oldEl->shgcG3Pos;
el->wiYacChrom = oldEl->wiYacChrom;
el->wiYacPos = oldEl->wiYacPos;
el->shgcTngChrom = oldEl->shgcTngChrom;
el->shgcTngPos = oldEl->shgcTngPos;
el->fishChrom = oldEl->fishChrom;
el->beginBand = oldEl->beginBand;
el->endBand = oldEl->endBand;
el->wiRhChrom = "0";
el->wiRhPos = 0;
el->decodeChrom = "0";
el->decodePos = 0;
el->lab = "-";
}
Esempio n. 4
0
static time_t gbParseHumanTimeStamp(char *col, boolean *isOkRet)
/* Parse a time stamp, in  "2004-11-01 01:06:18" format, as returned
 * by mysql timestamp columns 4.1 or later. */
{
boolean isOk = TRUE;
struct tm tm;
time_t numTime;
ZeroVar(&tm);

if (strlen(col) != 19)
    isOk = FALSE;
tm.tm_year = parseUnsigned(col, 0, 4, &isOk)-1900;
if (col[4] != '-')
    isOk = FALSE;
tm.tm_mon = parseUnsigned(col, 5, 2, &isOk);
if (col[7] != '-')
    isOk = FALSE;
tm.tm_mday = parseUnsigned(col, 8, 2, &isOk);
if (col[10] != ' ')
    isOk = FALSE;
tm.tm_hour = parseUnsigned(col, 11, 2, &isOk);
if (col[13] != ':')
    isOk = FALSE;
tm.tm_min = parseUnsigned(col, 14, 2, &isOk);
if (col[16] != ':')
    isOk = FALSE;
tm.tm_sec = parseUnsigned(col, 17, 2, &isOk);
/* convert */
if ((numTime = mktime(&tm)) == -1)
    isOk = FALSE;

*isOkRet = isOk;
return numTime;
}
void wigEncode(char *bedFile, char *wigFile, char *wibFile)
/* Convert BED file to wiggle binary representation */
{
double upper=wigEncodeStartingUpperLimit, lower=wigEncodeStartingLowerLimit;
if ((lift != 0) || noOverlap || noOverlapSpanData || (wibSizeLimit > 0))
    {
    struct wigEncodeOptions options;

    ZeroVar(&options);	/*	make sure everything is zero	*/
    options.lift = lift;
    options.noOverlap = noOverlap;
    options.flagOverlapSpanData = noOverlapSpanData;
    options.wibSizeLimit = wibSizeLimit;
    wigAsciiToBinary(bedFile, wigFile, wibFile, &upper, &lower, &options);
    if ((wibSizeLimit > 0) && (options.wibSizeLimit >= wibSizeLimit))
	verbose(1,"#\twarning, reached wiggle size limits, %lld vs. %lld\n",
		wibSizeLimit, options.wibSizeLimit);
    }
else
    wigAsciiToBinary(bedFile, wigFile, wibFile, &upper, &lower, NULL);

if ( (wigEncodeStartingUpperLimit == upper) &&
	(wigEncodeStartingLowerLimit == lower) )
    errAbort("ERROR: wigEncode: empty input file: '%s'", bedFile );

verbose(1, "Converted %s, upper limit %.2f, lower limit %.2f\n",
                        bedFile, upper, lower);
}
static struct featBounds getFeatures(struct genePred *gp, int iExon)
/* get the bounds of the features within an exon */
{
int start = gp->exonStarts[iExon];
int end = gp->exonEnds[iExon];
struct featBounds fb;
ZeroVar(&fb);

if (start < gp->cdsStart)
    {
    /* has initial UTR */
    struct range *utr = (gp->strand[0] == '+') ? &fb.utr5 : &fb.utr3;
    utr->start = start;
    utr->end = (end < gp->cdsStart) ? end : gp->cdsStart;
    start = utr->end;
    }

if ((gp->cdsStart < end) && (gp->cdsEnd > start))
    {
    /* has CDS */
    fb.cds.start = start;
    fb.cds.end = (end < gp->cdsEnd) ? end : gp->cdsEnd;
    start = fb.cds.end;
    }

if (start >= gp->cdsEnd)
    {
    /* has terminal UTR */
    struct range *utr = (gp->strand[0] == '+') ? &fb.utr3 : &fb.utr5;
    utr->start = start;
    utr->end = end;
    }
return fb;
}
Esempio n. 7
0
struct frag *readFragList(char *fileName)
/* Read list of frags from file. */
{
struct frag *list = NULL, *frag;
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct dnaSeq seq;
char *s;
int fragIx;
struct hash *chromHash = newHash(5);
ZeroVar(&seq);

printf("Reading %s\n", fileName);
while (faSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name))
    {
    AllocVar(frag);
    frag->name = cloneString(seq.name);
    s = strrchr(seq.name, '_');
    if (s == NULL || !isdigit(s[1]))
        errAbort("Expecting _ and number in %s", seq.name);
    fragIx = atoi(s+1);
    frag->chrom = "chr14";
    frag->start = fragIx*1000;
    frag->end = frag->start + 1000;
    slAddHead(&list, frag);
    }
lineFileClose(&lf);
printf("Read %d fragments from %s\n", slCount(list), fileName);
slReverse(&list);
return list;
}
void splitByRecord(char *inName, int splitCount, char *outRoot, off_t estSize)
/* Split into a file base by base. */
{
struct dnaSeq seq;
struct lineFile *lf = lineFileOpen(inName, TRUE);
int digits = digitsBaseTen(splitCount);
off_t nextEnd = 0;
off_t curPos = 0;
int fileCount = 0;
FILE *f = NULL;
char outPath[PATH_LEN];
ZeroVar(&seq);

while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name))
    {
    curPos += seq.size;
    if (curPos > nextEnd)
        {
	carefulClose(&f);
        mkOutPath(outPath, outRoot, digits, fileCount++);
	verbose(2, "writing %s\n", outPath);
	f = mustOpen(outPath, "w");
	nextEnd = calcNextEnd(fileCount, splitCount, estSize);
	}
    faWriteNext(f, seq.name, seq.dna, seq.size);
    }
carefulClose(&f);
lineFileClose(&lf);
}
Esempio n. 9
0
void samToOpenBed(char *samIn, FILE *f)
/* Like samToOpenBed, but the output is the already open file f. */
{
    samfile_t *sf = samopen(samIn, "r", NULL);
    bam_header_t *bamHeader = sf->header;
    bam1_t one;
    ZeroVar(&one);
    int err;
    while ((err = samread(sf, &one)) >= 0)
    {
        int32_t tid = one.core.tid;
        if (tid < 0)
            continue;
        char *chrom = bamHeader->target_name[tid];
        // Approximate here... can do better if parse cigar.
        int start = one.core.pos;
        int size = one.core.l_qseq;
        int end = start + size;
        boolean isRc = (one.core.flag & BAM_FREVERSE);
        char strand = '+';
        if (isRc)
        {
            strand = '-';
            reverseIntRange(&start, &end, bamHeader->target_len[tid]);
        }
        fprintf(f, "%s\t%d\t%d\t.\t0\t%c\n", chrom, start, end, strand);
    }
    if (err < 0 && err != -1)
        errnoAbort("samread err %d", err);
    samclose(sf);
}
Esempio n. 10
0
void migrateAligned(struct gbSelect* select, struct gbSelect* prevSelect,
                    struct gbAlignInfo* alignInfo, struct outputFiles* out,
                    struct recCounts* recCounts)
/* Migrate existing aligned PSLs from an earlier release. */
{
int orgCatIdx = gbOrgCatIdx(select->orgCats);
struct gbUpdate* prevUpdateHold = prevSelect->update;
struct gbUpdate* prevUpdate;
struct migrateAligns migrate;
ZeroVar(&migrate);
migrate.select = select;
migrate.prevSelect = prevSelect;

/* traverse all updates in the previous release */
gbVerbEnter(1, "migrating alignments");
for (prevUpdate = prevSelect->release->updates; prevUpdate != NULL;
     prevUpdate = prevUpdate->next)
    {
    prevSelect->update = prevUpdate;
    migrateAlignedUpdate(prevSelect, &migrate, out, recCounts);
    }
prevSelect->update = prevUpdateHold;
recCountsSum(recCounts, &migrate.counts);
if (migrate.counts.pslCnts.recCnt[orgCatIdx] != alignInfo->migrate.recCnt[orgCatIdx])
    errAbort("expected to migrate %d %s PSLs, found %d",
             alignInfo->migrate.recCnt[orgCatIdx], gbOrgCatName(select->orgCats),
             migrate.counts.pslCnts.recCnt[orgCatIdx]);
gbVerbLeave(1, "migrating alignments");
}
Esempio n. 11
0
void polyInfo(char *pslFile, char *genoFile, char *estFile, char *outputFile)
/* polyInfo - Collect info on polyAdenylation signals etc. */
{
struct hash *pslHash = NULL;
struct hash *genoHash = loadGeno(genoFile);
static struct dnaSeq est;
struct lineFile *lf = NULL;
FILE *f = NULL;

pslHash = pslIntoHash(pslFile);
lf = lineFileOpen(estFile, TRUE);
f = mustOpen(outputFile, "w");

while (faSpeedReadNext(lf, &est.dna, &est.size, &est.name))
    {
    struct pslList *pl;
    struct psl *psl;
    struct estOrientInfo ei;
    if ((pl = hashFindVal(pslHash, est.name)) != NULL)
        {
	for (psl = pl->list; psl != NULL; psl = psl->next)
	    {
            struct dnaSeq *geno = hashMustFindVal(genoHash, psl->tName);
	    if (psl->tSize != geno->size)
	        errAbort("psl generated on a different version of the genome");
	    ZeroVar(&ei);
	    fillInEstInfo(&ei, &est, geno, psl);
	    estOrientInfoTabOut(&ei, f);
	    }
	}
    }
}
Esempio n. 12
0
static void pslCDnaFilter(char *inPsl, char *outPsl)
/* filter cDNA alignments in psl format */
{
struct outFiles outFiles;
ZeroVar(&outFiles);
outFiles.passFh = mustOpen(outPsl, "w");
if (gDropped != NULL)
    outFiles.dropFh = mustOpen(gDropped, "w");
if (gWeirdOverlappped != NULL)
    outFiles.weirdOverFh = mustOpen(gWeirdOverlappped, "w");
if (gHapRefMapped != NULL)
    outFiles.hapRefMappedFh = mustOpen(gHapRefMapped, "w");
if (gHapRefCDnaAlns != NULL)
    outFiles.hapRefCDnaAlnsFh = mustOpen(gHapRefCDnaAlns, "w");
if (gHapLociAlns != NULL)
    outFiles.hapLociAlnsFh = mustOpen(gHapLociAlns, "w");
struct hapRegions *hapRegions = (gHapRegions == NULL) ? NULL
    : hapRegionsNew(gHapRegions, outFiles.hapRefMappedFh, outFiles.hapRefCDnaAlnsFh);
struct cDnaReader *reader = cDnaReaderNew(inPsl, gCDnaOpts, gPolyASizes, hapRegions);

while (cDnaReaderNext(reader))
    filterQuery(reader->cdna, hapRegions, &outFiles);

carefulClose(&outFiles.hapRefMappedFh);
carefulClose(&outFiles.hapRefCDnaAlnsFh);
carefulClose(&outFiles.hapLociAlnsFh);
carefulClose(&outFiles.dropFh);
carefulClose(&outFiles.weirdOverFh);
carefulClose(&outFiles.passFh);
cDnaStatsPrint(&reader->stats, 1);
hapRegionsFree(&hapRegions);
cDnaReaderFree(&reader);
}
void splitNcbiFa(char *ncbiIn, char *outDir)
/* splitNcbiFa - Split up NCBI format fa file into UCSC formatted ones.. */
{
struct lineFile *lf = lineFileOpen(ncbiIn, TRUE);
static struct dnaSeq seq;
ZeroVar(&seq);

makeDir(outDir);
while (faSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name))
    {
    FILE *f;
    char fileName[512];
    char *row[5];
    int wordCount;
    char ourName[129];
    char cloneName[128];

    wordCount = chopByChar(seq.name, '|', row, ArraySize(row));
    if (wordCount != 5)
        errAbort("Expecting 5 | separated fields line %d of %s", lf->lineIx, lf->fileName);
    strcpy(cloneName, row[3]);
    chopSuffix(cloneName);
    sprintf(fileName, "%s/%s.fa", outDir, cloneName);
    sprintf(ourName, "%s_1", row[3]);
    faWrite(fileName, ourName, seq.dna, seq.size);
    }
}
Esempio n. 14
0
static void addGffLineFromBed(struct bed *bed, char *source, char *feature,
			      int start, int end, char frame, char *txName)
/* Create a gffLine from a bed and line-specific parameters and print it out. */
{
struct gffLine gff;
ZeroVar(&gff);
char strand;
gff.seq = bed->chrom;
gff.source = source;
gff.feature = feature;
gff.start = start;
gff.end = end;
gff.score = bed->score;
strand = bed->strand[0];
if (strand != '+' && strand != '-')
    strand = '.';
gff.strand = strand;
gff.frame = frame;
gff.group = txName;
if (bed->name != NULL)
    gff.geneId = bed->name;
else
    {
    static int namelessIx = 0;
    char buf[64];
    safef(buf, sizeof(buf), "gene%d", ++namelessIx);
    gff.geneId = buf;
    }
gffTabOut(&gff, stdout);
}
void processOneGraph(struct txGraph *txg, struct hash *weightHash, double threshold,
                     char *outType, FILE *f)
/* Write out edges for one graph. */
{
    struct txEdge *edge;
    struct txEdgeBed e;
    ZeroVar(&e);
    e.chrom = txg->tName;
    for (edge = txg->edgeList; edge != NULL; edge = edge->next)
    {
        double weight = weightOfEvidence(txg, edge->evList, weightHash);
        if (weight >= threshold)
        {
            struct txVertex *start = &txg->vertices[edge->startIx];
            struct txVertex *end = &txg->vertices[edge->endIx];
            e.chromStart = start->position;
            e.chromEnd = end->position;
            e.name = outType;
            e.score = edge->evCount;
            e.strand[0] = txg->strand[0];
            e.startType[0] = ggVertexTypeAsString(start->type)[0];
            e.type = edge->type;
            e.endType[0] = ggVertexTypeAsString(end->type)[0];
            txEdgeBedTabOut(&e, f);
        }
    }
}
void splitAbout(char *inName, off_t approxSize, char *outRoot)
/* Split into chunks of about approxSize.  Don't break up
 * sequence though. */
{
struct dnaSeq seq;
struct lineFile *lf = lineFileOpen(inName, TRUE);
int digits = 2;
off_t curPos = approxSize;
int fileCount = 0;
FILE *f = NULL;
char outPath[PATH_LEN];
ZeroVar(&seq);

while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name))
    {
    if (curPos >= approxSize)
        {
	carefulClose(&f);
	curPos = 0;
        mkOutPath(outPath, outRoot, digits, fileCount++);
	verbose(2, "writing %s\n", outPath);
	f = mustOpen(outPath, "w");
	}
    curPos += seq.size;
    faWriteNext(f, seq.name, seq.dna, seq.size);
    }
carefulClose(&f);
lineFileClose(&lf);
}
Esempio n. 17
0
struct gbRelease* loadIndex(char* relName, unsigned types, char* database)
/* load processed section of index for release */
{
struct gbIndex* index = gbIndexNew(database, NULL);
struct gbSelect select;
ZeroVar(&select);
select.release = gbIndexMustFindRelease(index, relName);

if (types & GB_MRNA)
    {
    select.type = GB_MRNA;
    gbReleaseLoadProcessed(&select);
    }
if ((types & GB_EST) && (select.release->srcDb == GB_GENBANK))
    {
    struct slName* prefixes, *prefix;
    select.type = GB_EST;
    prefixes = gbReleaseGetAccPrefixes(select.release, GB_PROCESSED, GB_EST);
    for (prefix = prefixes; prefix != NULL; prefix = prefix->next)
        {
        select.accPrefix = prefix->name;
        gbReleaseLoadProcessed(&select);
        }
    select.accPrefix = NULL;
    slFreeList(&prefixes);
    }

return select.release;
}
void splitByNamePrefix(char *inName, char *outRoot, int preFixCount)
/* Split into chunks using prefix of sequence names.  */
{
struct dnaSeq seq;
struct lineFile *lf = lineFileOpen(inName, TRUE);
FILE *f = NULL;
char outDir[256], outFile[128], ext[64], outPath[512], preFix[512];
ZeroVar(&seq);

splitPath(outRoot, outDir, outFile, ext);
assert(preFixCount < sizeof(preFix));

while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name))
    {
    carefulClose(&f);
    strncpy(preFix, seq.name, preFixCount);
    preFix[preFixCount] = '\0';
    sprintf(outPath, "%s%s.fa", outDir, preFix);
    verbose(2, "writing %s\n", outPath);
    f = mustOpen(outPath, "a");
    faWriteNext(f, seq.name, seq.dna, seq.size);
    }
carefulClose(&f);
lineFileClose(&lf);
}
Esempio n. 19
0
void chainPair(struct seqPair *sp,
	struct dnaSeq *qSeq, struct dnaSeq *tSeq, struct chain **pChainList,
	FILE *details)
/* Chain up blocks and output. */
{
struct chain *chainList, *chain, *next;
struct cBlock *b;
long startTime, dt;
int size = 0;
struct chainConnect cc;

verbose(1, "chainPair %s\n", sp->name);

/* Set up info for connect function. */
ZeroVar(&cc);
cc.query = qSeq;
cc.target = tSeq;
cc.ss = scoreScheme;
cc.gapCalc = gapCalc;

/* Score blocks. */
for (b = sp->blockList; b != NULL; b = b->next)
    {
    size = b->qEnd - b->qStart;
    checkBlockRange("query", qSeq, b->qStart, b->qEnd);
    checkBlockRange("target", tSeq, b->tStart, b->tEnd);
    b->score = axtScoreUngapped(scoreScheme, qSeq->dna + b->qStart, tSeq->dna + b->tStart, size);
    }


/* Get chain list and clean it up a little. */
startTime = clock1000();
chainList = chainBlocks(sp->qName, qSeq->size, sp->qStrand,
	sp->tName, tSeq->size, &sp->blockList,
	(ConnectCost)chainConnectCost, (GapCost)chainConnectGapCost,
	&cc, details);
dt = clock1000() - startTime;
verbose(1, "Main chaining step done in %ld milliseconds\n", dt);
for (chain = chainList; chain != NULL; chain = chain->next)
    {
    chainRemovePartialOverlaps(chain, qSeq, tSeq, scoreScheme->matrix);
    chainMergeAbutting(chain);
    chain->score = chainCalcScore(chain, scoreScheme, gapCalc,
    	qSeq, tSeq);
    }

/* Move chains scoring over threshold to master list. */
for (chain = chainList; chain != NULL; chain = next)
    {
    next = chain->next;
    if (chain->score >= minScore)
        {
	slAddHead(pChainList, chain);
	}
    else
        {
	chainFree(&chain);
	}
    }
}
int main(int argc, char* argv[])
{
char *relName, *updateName, *typeAccPrefix, *database, *sep;
struct gbIndex* index;
struct gbSelect select;
struct gbSelect* prevSelect = NULL;
struct gbAlignInfo alignInfo;
boolean noMigrate;
ZeroVar(&select);

optionInit(&argc, argv, optionSpecs);
if (argc != 5)
    usage();
maxFaSize = optionInt("fasize", -1);
workDir = optionVal("workdir", "work/align");
noMigrate = optionExists("noMigrate");
createPolyASizes = optionExists("polyASizes");
gbVerbInit(optionInt("verbose", 0));
relName = argv[1];
updateName = argv[2];
typeAccPrefix = argv[3];
database = argv[4];

/* parse typeAccPrefix */
sep = strchr(typeAccPrefix, '.');
if (sep != NULL)
    *sep = '\0';
select.type = gbParseType(typeAccPrefix);
if (sep != NULL)
    {
    select.accPrefix = sep+1;
    *sep = '.';
    }
select.orgCats = gbParseOrgCat(optionVal("orgCats", "native,xeno"));

index = gbIndexNew(database, NULL);
select.release = gbIndexMustFindRelease(index, relName);
select.update = gbReleaseMustFindUpdate(select.release, updateName);
gbVerbMsg(0, "gbAlignGet: %s/%s/%s/%s", select.release->name,
          select.release->genome->database, select.update->name,
          typeAccPrefix);

/* Get the release to migrate, if applicable */
if (!noMigrate)
    prevSelect = gbAlignGetMigrateRel(&select);

alignInfo = gbAlignGet(&select, prevSelect);

/* always print stats */
fprintf(stderr, "gbAlignGet: %s/%s/%s/%s: align=%d, migrate=%d\n",
        select.release->name, select.release->genome->database,
        select.update->name, typeAccPrefix,
        alignInfo.align.accTotalCnt, alignInfo.migrate.accTotalCnt);
gbIndexFree(&index);

/* print alignment and migrate count, which is read by the driver program */
printf("alignCnt: %d %d\n", alignInfo.align.accTotalCnt, alignInfo.migrate.accTotalCnt);
return 0;
}
Esempio n. 21
0
void faToTwoBit(char *inFiles[], int inFileCount, char *outFile)
/* Convert inFiles in fasta format to outfile in 2 bit 
 * format. */
{
struct twoBit *twoBitList = NULL, *twoBit;
int i;
struct hash *uniqHash = newHash(18);
FILE *f;

for (i=0; i<inFileCount; ++i)
    {
    char *fileName = inFiles[i];
    struct lineFile *lf = lineFileOpen(fileName, TRUE);
    struct dnaSeq seq;
    ZeroVar(&seq);
    while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name))
        {
	if (seq.size == 0)
	    {
	    warn("Skipping item %s which has no sequence.\n",seq.name);
	    continue;
	    }
	    
        /* strip off version number */
        if (stripVersion)
            {
            char *sp = NULL;
            sp = strchr(seq.name,'.');
            if (sp != NULL)
                *sp = '\0';
            }

        if (hashLookup(uniqHash, seq.name))
            {
            if (!ignoreDups)
                errAbort("Duplicate sequence name %s", seq.name);
            else
                continue;
            }
	hashAdd(uniqHash, seq.name, NULL);
	if (noMask)
	    faToDna(seq.dna, seq.size);
	else
	    unknownToN(seq.dna, seq.size);
	twoBit = twoBitFromDnaSeq(&seq, !noMask);
	slAddHead(&twoBitList, twoBit);
	}
    lineFileClose(&lf);
    }
slReverse(&twoBitList);
f = mustOpen(outFile, "wb");
twoBitWriteHeader(twoBitList, f);
for (twoBit = twoBitList; twoBit != NULL; twoBit = twoBit->next)
    {
    twoBitWriteOne(twoBit, f);
    }
carefulClose(&f);
}
void gapSplit(char *input, char *output)
/* gapSplit - split sequence on gaps of size N. */
{
struct lineFile *lf = lineFileOpen(input,TRUE);
FILE *f = mustOpen(output, "w");
struct dnaSeq seq;

ZeroVar(&seq);
while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name))
    {
    struct dyString *seqName = dyStringNew(0);
    int pos = 0;
    int pieceCount = 0;
    int charLineCount = 0;

    dyStringPrintf(seqName,"%s_%d", seq.name, pieceCount++);
    fprintf(f,">%s\n", seqName->string);
    while (pos < seq.size)
	{
	int gapSize = 0;
	int gapStart = 0;
	fprintf(f,"%c",seq.dna[pos]);
	++charLineCount;
	if (!(charLineCount % 60)) { fprintf(f,"\n"); charLineCount = 0; }
	++pos;
	gapStart = pos;	/*	remember where possible gap starts	*/
	/* see if gap is entered */
	if ('n' == seq.dna[pos] || 'N' == seq.dna[pos])
	    {
	    /*	enter gap, size it	*/
	    while (pos < seq.size &&
		('n' == seq.dna[pos] || 'N' == seq.dna[pos]))
		{
		++gapSize;
		++pos;
		}
	    }
	/*	valid gap size to split here ?	*/
	if ((gapSize >= minGap) && (pos < seq.size))
	    {
	    if (charLineCount % 60) { fprintf(f,"\n"); charLineCount = 0; }
	    dyStringClear(seqName);
	    dyStringPrintf(seqName,"%s_%d", seq.name, pieceCount++);
	    fprintf(f,">%s\n", seqName->string);
	    }
	else if (gapSize < minGap)
	    {
	    while (gapSize-- > 0)
		{
		fprintf(f,"%c",seq.dna[gapStart++]);
		++charLineCount;
		if (!(charLineCount % 60)) {fprintf(f,"\n"); charLineCount = 0;}
		}
	    }
	}
    if (charLineCount % 60) { fprintf(f,"\n"); }
    }
}
Esempio n. 23
0
boolean bamIsSortedByTarget(char *fileName, int maxToCheck)
/* Return TRUE if bam is sorted by target for at least the first bits. */
{
int leftToCheck = maxToCheck;
struct hash *targetHash = hashNew(0);
boolean result = TRUE;

/* Open bam/sam file and set up basic I/O vars on it. */
samfile_t *sf = samopen(fileName, "rb", NULL);
bam_header_t *bamHeader = sf->header;
bam1_t one;
ZeroVar(&one);
int err;

char lastTarget[PATH_LEN] = "";
int lastPos = 0;

/* Loop through while still haven't hit our max and file still has data */
while ((err = bam_read1(sf->x.bam, &one)) >= 0)
    {
    if (--leftToCheck < 0)
        {
	break;
	}
    /* Get target,  skipping read if it's not aligned well enough to have a target. */
    int32_t tid = one.core.tid;
    if (tid < 0)
        continue;
    char *target = bamHeader->target_name[tid];

    int pos = one.core.pos;

    /* If we are on same target then make sure we are in ascending order. */
    if (sameString(target, lastTarget))
        {
	if (pos < lastPos)
	    {
	    result = FALSE;
	    break;
	    }
	}
    else
	{
	/* If sorted should not go back to a new chromosome. Use hash to check this */
	if (hashLookup(targetHash, target))
	    {
	    result = FALSE;
	    break;
	    }
	hashAdd(targetHash, target, NULL);
	safef(lastTarget, sizeof(lastTarget), "%s", target);
	}
    lastPos = pos;
    }
hashFree(&targetHash);
return result;
}
Esempio n. 24
0
struct oneStat *totalUtr(struct blatStats *stats)
/* Return sum of 5' and 3' UTRs. */
{
static struct oneStat acc;
ZeroVar(&acc);
addStat(&stats->utr5, &acc);
addStat(&stats->utr3, &acc);
return &acc;
}
Esempio n. 25
0
struct chromAnnMapIter chromAnnMapFirst(struct chromAnnMap *cam)
/* get iterator over a chromAnnMap */
{
struct chromAnnMapIter iter;
ZeroVar(&iter);
iter.cam = cam;
iter.chromCookie = hashFirst(cam->ranges->hash);
return iter;
}
Esempio n. 26
0
struct oneStat *totalSplice(struct blatStats *stats)
/* Return sum of 5' and 3' splice sites. */
{
static struct oneStat acc;
ZeroVar(&acc);
addStat(&stats->splice5, &acc);
addStat(&stats->splice3, &acc);
return &acc;
}
Esempio n. 27
0
struct genbankCds getCds(struct sqlConnection *conn, struct psl *psl)
/* Lookup the CDS, either in the database or hash, or generate for query.  If
 * not found and looks like a it has a genbank version, try without the
 * version.  If allCds is true, generate a cds that covers the query.  Conn
 * maybe null if gCdsTable exists or gAllCds or gNoCds are true.  If CDS can't be
 * obtained, start and end are both set to -1.  If there is an error parsing
 * it, start and end are both set to 0. */
{
    struct genbankCds cds;
    ZeroVar(&cds);
    if (gNoCds)
    {
        cds.start = -1;
        cds.end = -1;
        cds.startComplete = FALSE;
        cds.endComplete = FALSE;
    }
    else if (gAllCds)
    {
        cds.start = psl->qStart;
        cds.end = psl->qEnd;
        if (psl->strand[0] == '-')
            reverseIntRange(&cds.start, &cds.end, psl->qSize);
        cds.startComplete = TRUE;
        cds.endComplete = TRUE;
    }
    else
    {
        char cdsBuf[4096];
        char *cdsStr = getCdsForAcc(conn, psl->qName, cdsBuf, sizeof(cdsBuf));
        if (cdsStr == NULL)
        {
            if (!gQuiet)
                fprintf(stderr, "Warning: no CDS for %s\n", psl->qName);
            cds.start = cds.end = -1;
        }
        else
        {
            if (!genbankCdsParse(cdsStr, &cds))
            {
                if (!gQuiet)
                    fprintf(stderr, "Warning: invalid CDS for %s: %s\n",
                            psl->qName, cdsStr);
            }
            else if ((cds.end-cds.start) > psl->qSize)
            {
                if (!gQuiet)
                    fprintf(stderr, "Warning: CDS for %s (%u..%u) longer than qSize (%u)\n",
                            psl->qName, cds.start, cds.end, psl->qSize);
                cds.start = cds.end = -1;
            }
        }
    }
    return cds;
}
Esempio n. 28
0
off_t fileSize(char *pathname)
/* get file size for pathname. return -1 if not found */
{
struct stat mystat;
ZeroVar(&mystat);
if (stat(pathname,&mystat)==-1)
    {
    return -1;
    }
return mystat.st_size;
}
Esempio n. 29
0
struct oneStat *totalIntron(struct blatStats *stats)
/* Return sum of all intron regions. */
{
static struct oneStat acc;
ZeroVar(&acc);
addStat(&stats->firstIntron, &acc);
addStat(&stats->middleIntron, &acc);
addStat(&stats->endIntron, &acc);
addStat(&stats->onlyIntron, &acc);
return &acc;
}
Esempio n. 30
0
struct hashCookie gbIgnoreFirst(struct gbIgnore *ignore)
/* get cookie to iterate over hash */
{
// a zero cookie will return NULL on hashNext() call 
struct hashCookie cookie;
if (ignore->accHash == NULL)
    ZeroVar(&cookie);
else
    cookie = hashFirst(ignore->accHash);
return cookie;
}