void udpCountServer(char *portName) /* countServer - A server that just returns a steadily increasing stream of numbers. */ { int port = atoi(portName); int ear, size; char buf[1024]; int count = 0; struct timeval startTime, tv; struct countMessage sendMessage, receiveMessage; struct sockaddr_in sai; ZeroVar(&sai); sai.sin_family = AF_INET; sai.sin_port = htons(port); sai.sin_addr.s_addr = INADDR_ANY; ear = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); if (bind(ear, (struct sockaddr *)&sai, sizeof(sai)) < 0) errAbort("Couldn't bind ear"); gettimeofday(&startTime, NULL); for (;;) { int err; int saiSize = sizeof(sai); ZeroVar(&sai); sai.sin_family = AF_INET; err = recvfrom(ear, &receiveMessage, sizeof(receiveMessage), 0, (struct sockaddr *)&sai, &saiSize); if (err < 0) { warn("couldn't receive %s", strerror(errno)); continue; } if (err != sizeof(receiveMessage)) { warn("Message truncated"); continue; } gettimeofday(&tv, NULL); sendMessage.time = timeDiff(&startTime, &tv); sendMessage.echoTime = receiveMessage.time; sendMessage.count = ++count; sendMessage.message = receiveMessage.message + 256; sendto(ear, &sendMessage, sizeof(sendMessage), 0, &sai, sizeof(sai)); if (!receiveMessage.message) break; } close(ear); printf("All done after %d\n", count); }
struct gbAlignInfo gbAlignFindNeedAligned(struct gbSelect* select, struct gbSelect* prevSelect) /* Find entries that need to be aligned or migrated for an update. * If prevSelect is not null, and select indicates the full update, * alignments will be flagged for migration if possible. Only the * update in select is processed, however alignments for any * of the prevSelect updates can be flagged for migration. * * If an entry is selected for migration: * prevEntry.clientFlags |= MIGRATE_FLAG * entry.clientFlags |= MIGRATE_FLAG * update.selectAligns |= GB_NATIVE or GB_XENO * * If an entry is selected for alignment: * entry.clientFlags |= MIGRATE_FLAG * update.selectProc |= GB_NATIVE or GB_XENO * Returns counts of entries to align or migrate. */ { struct gbAlignInfo alignInfo; struct gbProcessed* processed; ZeroVar(&alignInfo); /* visit all processed entries for this update */ for (processed = select->update->processed; processed != NULL; processed = processed->updateLink) { /* this will always select entries if this is the full update */ if (needAlignedSelect(select, processed->entry)) flagNeedAligned(select, prevSelect, processed, &alignInfo); } return alignInfo; }
void stsMapFromStsMarker(struct stsMarker *oldEl, struct stsMap *el) /* Convert from older stsMarker format to stsMap format. */ { ZeroVar(el); el->chrom = oldEl->chrom; el->chromStart = oldEl->chromStart; el->chromEnd = oldEl->chromEnd; el->name = oldEl->name; el->score = oldEl->score; el->identNo = oldEl->identNo; el->ctgAcc = oldEl->ctgAcc; el->otherAcc = oldEl->otherAcc; el->genethonChrom = oldEl->genethonChrom; el->genethonPos = oldEl->genethonPos; el->marshfieldChrom = oldEl->marshfieldChrom; el->marshfieldPos = oldEl->marshfieldPos; el->gm99Gb4Chrom = oldEl->gm99Gb4Chrom; el->gm99Gb4Pos = oldEl->gm99Gb4Pos; el->shgcG3Chrom = oldEl->shgcG3Chrom; el->shgcG3Pos = oldEl->shgcG3Pos; el->wiYacChrom = oldEl->wiYacChrom; el->wiYacPos = oldEl->wiYacPos; el->shgcTngChrom = oldEl->shgcTngChrom; el->shgcTngPos = oldEl->shgcTngPos; el->fishChrom = oldEl->fishChrom; el->beginBand = oldEl->beginBand; el->endBand = oldEl->endBand; el->wiRhChrom = "0"; el->wiRhPos = 0; el->decodeChrom = "0"; el->decodePos = 0; el->lab = "-"; }
static time_t gbParseHumanTimeStamp(char *col, boolean *isOkRet) /* Parse a time stamp, in "2004-11-01 01:06:18" format, as returned * by mysql timestamp columns 4.1 or later. */ { boolean isOk = TRUE; struct tm tm; time_t numTime; ZeroVar(&tm); if (strlen(col) != 19) isOk = FALSE; tm.tm_year = parseUnsigned(col, 0, 4, &isOk)-1900; if (col[4] != '-') isOk = FALSE; tm.tm_mon = parseUnsigned(col, 5, 2, &isOk); if (col[7] != '-') isOk = FALSE; tm.tm_mday = parseUnsigned(col, 8, 2, &isOk); if (col[10] != ' ') isOk = FALSE; tm.tm_hour = parseUnsigned(col, 11, 2, &isOk); if (col[13] != ':') isOk = FALSE; tm.tm_min = parseUnsigned(col, 14, 2, &isOk); if (col[16] != ':') isOk = FALSE; tm.tm_sec = parseUnsigned(col, 17, 2, &isOk); /* convert */ if ((numTime = mktime(&tm)) == -1) isOk = FALSE; *isOkRet = isOk; return numTime; }
void wigEncode(char *bedFile, char *wigFile, char *wibFile) /* Convert BED file to wiggle binary representation */ { double upper=wigEncodeStartingUpperLimit, lower=wigEncodeStartingLowerLimit; if ((lift != 0) || noOverlap || noOverlapSpanData || (wibSizeLimit > 0)) { struct wigEncodeOptions options; ZeroVar(&options); /* make sure everything is zero */ options.lift = lift; options.noOverlap = noOverlap; options.flagOverlapSpanData = noOverlapSpanData; options.wibSizeLimit = wibSizeLimit; wigAsciiToBinary(bedFile, wigFile, wibFile, &upper, &lower, &options); if ((wibSizeLimit > 0) && (options.wibSizeLimit >= wibSizeLimit)) verbose(1,"#\twarning, reached wiggle size limits, %lld vs. %lld\n", wibSizeLimit, options.wibSizeLimit); } else wigAsciiToBinary(bedFile, wigFile, wibFile, &upper, &lower, NULL); if ( (wigEncodeStartingUpperLimit == upper) && (wigEncodeStartingLowerLimit == lower) ) errAbort("ERROR: wigEncode: empty input file: '%s'", bedFile ); verbose(1, "Converted %s, upper limit %.2f, lower limit %.2f\n", bedFile, upper, lower); }
static struct featBounds getFeatures(struct genePred *gp, int iExon) /* get the bounds of the features within an exon */ { int start = gp->exonStarts[iExon]; int end = gp->exonEnds[iExon]; struct featBounds fb; ZeroVar(&fb); if (start < gp->cdsStart) { /* has initial UTR */ struct range *utr = (gp->strand[0] == '+') ? &fb.utr5 : &fb.utr3; utr->start = start; utr->end = (end < gp->cdsStart) ? end : gp->cdsStart; start = utr->end; } if ((gp->cdsStart < end) && (gp->cdsEnd > start)) { /* has CDS */ fb.cds.start = start; fb.cds.end = (end < gp->cdsEnd) ? end : gp->cdsEnd; start = fb.cds.end; } if (start >= gp->cdsEnd) { /* has terminal UTR */ struct range *utr = (gp->strand[0] == '+') ? &fb.utr3 : &fb.utr5; utr->start = start; utr->end = end; } return fb; }
struct frag *readFragList(char *fileName) /* Read list of frags from file. */ { struct frag *list = NULL, *frag; struct lineFile *lf = lineFileOpen(fileName, TRUE); struct dnaSeq seq; char *s; int fragIx; struct hash *chromHash = newHash(5); ZeroVar(&seq); printf("Reading %s\n", fileName); while (faSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { AllocVar(frag); frag->name = cloneString(seq.name); s = strrchr(seq.name, '_'); if (s == NULL || !isdigit(s[1])) errAbort("Expecting _ and number in %s", seq.name); fragIx = atoi(s+1); frag->chrom = "chr14"; frag->start = fragIx*1000; frag->end = frag->start + 1000; slAddHead(&list, frag); } lineFileClose(&lf); printf("Read %d fragments from %s\n", slCount(list), fileName); slReverse(&list); return list; }
void splitByRecord(char *inName, int splitCount, char *outRoot, off_t estSize) /* Split into a file base by base. */ { struct dnaSeq seq; struct lineFile *lf = lineFileOpen(inName, TRUE); int digits = digitsBaseTen(splitCount); off_t nextEnd = 0; off_t curPos = 0; int fileCount = 0; FILE *f = NULL; char outPath[PATH_LEN]; ZeroVar(&seq); while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { curPos += seq.size; if (curPos > nextEnd) { carefulClose(&f); mkOutPath(outPath, outRoot, digits, fileCount++); verbose(2, "writing %s\n", outPath); f = mustOpen(outPath, "w"); nextEnd = calcNextEnd(fileCount, splitCount, estSize); } faWriteNext(f, seq.name, seq.dna, seq.size); } carefulClose(&f); lineFileClose(&lf); }
void samToOpenBed(char *samIn, FILE *f) /* Like samToOpenBed, but the output is the already open file f. */ { samfile_t *sf = samopen(samIn, "r", NULL); bam_header_t *bamHeader = sf->header; bam1_t one; ZeroVar(&one); int err; while ((err = samread(sf, &one)) >= 0) { int32_t tid = one.core.tid; if (tid < 0) continue; char *chrom = bamHeader->target_name[tid]; // Approximate here... can do better if parse cigar. int start = one.core.pos; int size = one.core.l_qseq; int end = start + size; boolean isRc = (one.core.flag & BAM_FREVERSE); char strand = '+'; if (isRc) { strand = '-'; reverseIntRange(&start, &end, bamHeader->target_len[tid]); } fprintf(f, "%s\t%d\t%d\t.\t0\t%c\n", chrom, start, end, strand); } if (err < 0 && err != -1) errnoAbort("samread err %d", err); samclose(sf); }
void migrateAligned(struct gbSelect* select, struct gbSelect* prevSelect, struct gbAlignInfo* alignInfo, struct outputFiles* out, struct recCounts* recCounts) /* Migrate existing aligned PSLs from an earlier release. */ { int orgCatIdx = gbOrgCatIdx(select->orgCats); struct gbUpdate* prevUpdateHold = prevSelect->update; struct gbUpdate* prevUpdate; struct migrateAligns migrate; ZeroVar(&migrate); migrate.select = select; migrate.prevSelect = prevSelect; /* traverse all updates in the previous release */ gbVerbEnter(1, "migrating alignments"); for (prevUpdate = prevSelect->release->updates; prevUpdate != NULL; prevUpdate = prevUpdate->next) { prevSelect->update = prevUpdate; migrateAlignedUpdate(prevSelect, &migrate, out, recCounts); } prevSelect->update = prevUpdateHold; recCountsSum(recCounts, &migrate.counts); if (migrate.counts.pslCnts.recCnt[orgCatIdx] != alignInfo->migrate.recCnt[orgCatIdx]) errAbort("expected to migrate %d %s PSLs, found %d", alignInfo->migrate.recCnt[orgCatIdx], gbOrgCatName(select->orgCats), migrate.counts.pslCnts.recCnt[orgCatIdx]); gbVerbLeave(1, "migrating alignments"); }
void polyInfo(char *pslFile, char *genoFile, char *estFile, char *outputFile) /* polyInfo - Collect info on polyAdenylation signals etc. */ { struct hash *pslHash = NULL; struct hash *genoHash = loadGeno(genoFile); static struct dnaSeq est; struct lineFile *lf = NULL; FILE *f = NULL; pslHash = pslIntoHash(pslFile); lf = lineFileOpen(estFile, TRUE); f = mustOpen(outputFile, "w"); while (faSpeedReadNext(lf, &est.dna, &est.size, &est.name)) { struct pslList *pl; struct psl *psl; struct estOrientInfo ei; if ((pl = hashFindVal(pslHash, est.name)) != NULL) { for (psl = pl->list; psl != NULL; psl = psl->next) { struct dnaSeq *geno = hashMustFindVal(genoHash, psl->tName); if (psl->tSize != geno->size) errAbort("psl generated on a different version of the genome"); ZeroVar(&ei); fillInEstInfo(&ei, &est, geno, psl); estOrientInfoTabOut(&ei, f); } } } }
static void pslCDnaFilter(char *inPsl, char *outPsl) /* filter cDNA alignments in psl format */ { struct outFiles outFiles; ZeroVar(&outFiles); outFiles.passFh = mustOpen(outPsl, "w"); if (gDropped != NULL) outFiles.dropFh = mustOpen(gDropped, "w"); if (gWeirdOverlappped != NULL) outFiles.weirdOverFh = mustOpen(gWeirdOverlappped, "w"); if (gHapRefMapped != NULL) outFiles.hapRefMappedFh = mustOpen(gHapRefMapped, "w"); if (gHapRefCDnaAlns != NULL) outFiles.hapRefCDnaAlnsFh = mustOpen(gHapRefCDnaAlns, "w"); if (gHapLociAlns != NULL) outFiles.hapLociAlnsFh = mustOpen(gHapLociAlns, "w"); struct hapRegions *hapRegions = (gHapRegions == NULL) ? NULL : hapRegionsNew(gHapRegions, outFiles.hapRefMappedFh, outFiles.hapRefCDnaAlnsFh); struct cDnaReader *reader = cDnaReaderNew(inPsl, gCDnaOpts, gPolyASizes, hapRegions); while (cDnaReaderNext(reader)) filterQuery(reader->cdna, hapRegions, &outFiles); carefulClose(&outFiles.hapRefMappedFh); carefulClose(&outFiles.hapRefCDnaAlnsFh); carefulClose(&outFiles.hapLociAlnsFh); carefulClose(&outFiles.dropFh); carefulClose(&outFiles.weirdOverFh); carefulClose(&outFiles.passFh); cDnaStatsPrint(&reader->stats, 1); hapRegionsFree(&hapRegions); cDnaReaderFree(&reader); }
void splitNcbiFa(char *ncbiIn, char *outDir) /* splitNcbiFa - Split up NCBI format fa file into UCSC formatted ones.. */ { struct lineFile *lf = lineFileOpen(ncbiIn, TRUE); static struct dnaSeq seq; ZeroVar(&seq); makeDir(outDir); while (faSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { FILE *f; char fileName[512]; char *row[5]; int wordCount; char ourName[129]; char cloneName[128]; wordCount = chopByChar(seq.name, '|', row, ArraySize(row)); if (wordCount != 5) errAbort("Expecting 5 | separated fields line %d of %s", lf->lineIx, lf->fileName); strcpy(cloneName, row[3]); chopSuffix(cloneName); sprintf(fileName, "%s/%s.fa", outDir, cloneName); sprintf(ourName, "%s_1", row[3]); faWrite(fileName, ourName, seq.dna, seq.size); } }
static void addGffLineFromBed(struct bed *bed, char *source, char *feature, int start, int end, char frame, char *txName) /* Create a gffLine from a bed and line-specific parameters and print it out. */ { struct gffLine gff; ZeroVar(&gff); char strand; gff.seq = bed->chrom; gff.source = source; gff.feature = feature; gff.start = start; gff.end = end; gff.score = bed->score; strand = bed->strand[0]; if (strand != '+' && strand != '-') strand = '.'; gff.strand = strand; gff.frame = frame; gff.group = txName; if (bed->name != NULL) gff.geneId = bed->name; else { static int namelessIx = 0; char buf[64]; safef(buf, sizeof(buf), "gene%d", ++namelessIx); gff.geneId = buf; } gffTabOut(&gff, stdout); }
void processOneGraph(struct txGraph *txg, struct hash *weightHash, double threshold, char *outType, FILE *f) /* Write out edges for one graph. */ { struct txEdge *edge; struct txEdgeBed e; ZeroVar(&e); e.chrom = txg->tName; for (edge = txg->edgeList; edge != NULL; edge = edge->next) { double weight = weightOfEvidence(txg, edge->evList, weightHash); if (weight >= threshold) { struct txVertex *start = &txg->vertices[edge->startIx]; struct txVertex *end = &txg->vertices[edge->endIx]; e.chromStart = start->position; e.chromEnd = end->position; e.name = outType; e.score = edge->evCount; e.strand[0] = txg->strand[0]; e.startType[0] = ggVertexTypeAsString(start->type)[0]; e.type = edge->type; e.endType[0] = ggVertexTypeAsString(end->type)[0]; txEdgeBedTabOut(&e, f); } } }
void splitAbout(char *inName, off_t approxSize, char *outRoot) /* Split into chunks of about approxSize. Don't break up * sequence though. */ { struct dnaSeq seq; struct lineFile *lf = lineFileOpen(inName, TRUE); int digits = 2; off_t curPos = approxSize; int fileCount = 0; FILE *f = NULL; char outPath[PATH_LEN]; ZeroVar(&seq); while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { if (curPos >= approxSize) { carefulClose(&f); curPos = 0; mkOutPath(outPath, outRoot, digits, fileCount++); verbose(2, "writing %s\n", outPath); f = mustOpen(outPath, "w"); } curPos += seq.size; faWriteNext(f, seq.name, seq.dna, seq.size); } carefulClose(&f); lineFileClose(&lf); }
struct gbRelease* loadIndex(char* relName, unsigned types, char* database) /* load processed section of index for release */ { struct gbIndex* index = gbIndexNew(database, NULL); struct gbSelect select; ZeroVar(&select); select.release = gbIndexMustFindRelease(index, relName); if (types & GB_MRNA) { select.type = GB_MRNA; gbReleaseLoadProcessed(&select); } if ((types & GB_EST) && (select.release->srcDb == GB_GENBANK)) { struct slName* prefixes, *prefix; select.type = GB_EST; prefixes = gbReleaseGetAccPrefixes(select.release, GB_PROCESSED, GB_EST); for (prefix = prefixes; prefix != NULL; prefix = prefix->next) { select.accPrefix = prefix->name; gbReleaseLoadProcessed(&select); } select.accPrefix = NULL; slFreeList(&prefixes); } return select.release; }
void splitByNamePrefix(char *inName, char *outRoot, int preFixCount) /* Split into chunks using prefix of sequence names. */ { struct dnaSeq seq; struct lineFile *lf = lineFileOpen(inName, TRUE); FILE *f = NULL; char outDir[256], outFile[128], ext[64], outPath[512], preFix[512]; ZeroVar(&seq); splitPath(outRoot, outDir, outFile, ext); assert(preFixCount < sizeof(preFix)); while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { carefulClose(&f); strncpy(preFix, seq.name, preFixCount); preFix[preFixCount] = '\0'; sprintf(outPath, "%s%s.fa", outDir, preFix); verbose(2, "writing %s\n", outPath); f = mustOpen(outPath, "a"); faWriteNext(f, seq.name, seq.dna, seq.size); } carefulClose(&f); lineFileClose(&lf); }
void chainPair(struct seqPair *sp, struct dnaSeq *qSeq, struct dnaSeq *tSeq, struct chain **pChainList, FILE *details) /* Chain up blocks and output. */ { struct chain *chainList, *chain, *next; struct cBlock *b; long startTime, dt; int size = 0; struct chainConnect cc; verbose(1, "chainPair %s\n", sp->name); /* Set up info for connect function. */ ZeroVar(&cc); cc.query = qSeq; cc.target = tSeq; cc.ss = scoreScheme; cc.gapCalc = gapCalc; /* Score blocks. */ for (b = sp->blockList; b != NULL; b = b->next) { size = b->qEnd - b->qStart; checkBlockRange("query", qSeq, b->qStart, b->qEnd); checkBlockRange("target", tSeq, b->tStart, b->tEnd); b->score = axtScoreUngapped(scoreScheme, qSeq->dna + b->qStart, tSeq->dna + b->tStart, size); } /* Get chain list and clean it up a little. */ startTime = clock1000(); chainList = chainBlocks(sp->qName, qSeq->size, sp->qStrand, sp->tName, tSeq->size, &sp->blockList, (ConnectCost)chainConnectCost, (GapCost)chainConnectGapCost, &cc, details); dt = clock1000() - startTime; verbose(1, "Main chaining step done in %ld milliseconds\n", dt); for (chain = chainList; chain != NULL; chain = chain->next) { chainRemovePartialOverlaps(chain, qSeq, tSeq, scoreScheme->matrix); chainMergeAbutting(chain); chain->score = chainCalcScore(chain, scoreScheme, gapCalc, qSeq, tSeq); } /* Move chains scoring over threshold to master list. */ for (chain = chainList; chain != NULL; chain = next) { next = chain->next; if (chain->score >= minScore) { slAddHead(pChainList, chain); } else { chainFree(&chain); } } }
int main(int argc, char* argv[]) { char *relName, *updateName, *typeAccPrefix, *database, *sep; struct gbIndex* index; struct gbSelect select; struct gbSelect* prevSelect = NULL; struct gbAlignInfo alignInfo; boolean noMigrate; ZeroVar(&select); optionInit(&argc, argv, optionSpecs); if (argc != 5) usage(); maxFaSize = optionInt("fasize", -1); workDir = optionVal("workdir", "work/align"); noMigrate = optionExists("noMigrate"); createPolyASizes = optionExists("polyASizes"); gbVerbInit(optionInt("verbose", 0)); relName = argv[1]; updateName = argv[2]; typeAccPrefix = argv[3]; database = argv[4]; /* parse typeAccPrefix */ sep = strchr(typeAccPrefix, '.'); if (sep != NULL) *sep = '\0'; select.type = gbParseType(typeAccPrefix); if (sep != NULL) { select.accPrefix = sep+1; *sep = '.'; } select.orgCats = gbParseOrgCat(optionVal("orgCats", "native,xeno")); index = gbIndexNew(database, NULL); select.release = gbIndexMustFindRelease(index, relName); select.update = gbReleaseMustFindUpdate(select.release, updateName); gbVerbMsg(0, "gbAlignGet: %s/%s/%s/%s", select.release->name, select.release->genome->database, select.update->name, typeAccPrefix); /* Get the release to migrate, if applicable */ if (!noMigrate) prevSelect = gbAlignGetMigrateRel(&select); alignInfo = gbAlignGet(&select, prevSelect); /* always print stats */ fprintf(stderr, "gbAlignGet: %s/%s/%s/%s: align=%d, migrate=%d\n", select.release->name, select.release->genome->database, select.update->name, typeAccPrefix, alignInfo.align.accTotalCnt, alignInfo.migrate.accTotalCnt); gbIndexFree(&index); /* print alignment and migrate count, which is read by the driver program */ printf("alignCnt: %d %d\n", alignInfo.align.accTotalCnt, alignInfo.migrate.accTotalCnt); return 0; }
void faToTwoBit(char *inFiles[], int inFileCount, char *outFile) /* Convert inFiles in fasta format to outfile in 2 bit * format. */ { struct twoBit *twoBitList = NULL, *twoBit; int i; struct hash *uniqHash = newHash(18); FILE *f; for (i=0; i<inFileCount; ++i) { char *fileName = inFiles[i]; struct lineFile *lf = lineFileOpen(fileName, TRUE); struct dnaSeq seq; ZeroVar(&seq); while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { if (seq.size == 0) { warn("Skipping item %s which has no sequence.\n",seq.name); continue; } /* strip off version number */ if (stripVersion) { char *sp = NULL; sp = strchr(seq.name,'.'); if (sp != NULL) *sp = '\0'; } if (hashLookup(uniqHash, seq.name)) { if (!ignoreDups) errAbort("Duplicate sequence name %s", seq.name); else continue; } hashAdd(uniqHash, seq.name, NULL); if (noMask) faToDna(seq.dna, seq.size); else unknownToN(seq.dna, seq.size); twoBit = twoBitFromDnaSeq(&seq, !noMask); slAddHead(&twoBitList, twoBit); } lineFileClose(&lf); } slReverse(&twoBitList); f = mustOpen(outFile, "wb"); twoBitWriteHeader(twoBitList, f); for (twoBit = twoBitList; twoBit != NULL; twoBit = twoBit->next) { twoBitWriteOne(twoBit, f); } carefulClose(&f); }
void gapSplit(char *input, char *output) /* gapSplit - split sequence on gaps of size N. */ { struct lineFile *lf = lineFileOpen(input,TRUE); FILE *f = mustOpen(output, "w"); struct dnaSeq seq; ZeroVar(&seq); while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { struct dyString *seqName = dyStringNew(0); int pos = 0; int pieceCount = 0; int charLineCount = 0; dyStringPrintf(seqName,"%s_%d", seq.name, pieceCount++); fprintf(f,">%s\n", seqName->string); while (pos < seq.size) { int gapSize = 0; int gapStart = 0; fprintf(f,"%c",seq.dna[pos]); ++charLineCount; if (!(charLineCount % 60)) { fprintf(f,"\n"); charLineCount = 0; } ++pos; gapStart = pos; /* remember where possible gap starts */ /* see if gap is entered */ if ('n' == seq.dna[pos] || 'N' == seq.dna[pos]) { /* enter gap, size it */ while (pos < seq.size && ('n' == seq.dna[pos] || 'N' == seq.dna[pos])) { ++gapSize; ++pos; } } /* valid gap size to split here ? */ if ((gapSize >= minGap) && (pos < seq.size)) { if (charLineCount % 60) { fprintf(f,"\n"); charLineCount = 0; } dyStringClear(seqName); dyStringPrintf(seqName,"%s_%d", seq.name, pieceCount++); fprintf(f,">%s\n", seqName->string); } else if (gapSize < minGap) { while (gapSize-- > 0) { fprintf(f,"%c",seq.dna[gapStart++]); ++charLineCount; if (!(charLineCount % 60)) {fprintf(f,"\n"); charLineCount = 0;} } } } if (charLineCount % 60) { fprintf(f,"\n"); } } }
boolean bamIsSortedByTarget(char *fileName, int maxToCheck) /* Return TRUE if bam is sorted by target for at least the first bits. */ { int leftToCheck = maxToCheck; struct hash *targetHash = hashNew(0); boolean result = TRUE; /* Open bam/sam file and set up basic I/O vars on it. */ samfile_t *sf = samopen(fileName, "rb", NULL); bam_header_t *bamHeader = sf->header; bam1_t one; ZeroVar(&one); int err; char lastTarget[PATH_LEN] = ""; int lastPos = 0; /* Loop through while still haven't hit our max and file still has data */ while ((err = bam_read1(sf->x.bam, &one)) >= 0) { if (--leftToCheck < 0) { break; } /* Get target, skipping read if it's not aligned well enough to have a target. */ int32_t tid = one.core.tid; if (tid < 0) continue; char *target = bamHeader->target_name[tid]; int pos = one.core.pos; /* If we are on same target then make sure we are in ascending order. */ if (sameString(target, lastTarget)) { if (pos < lastPos) { result = FALSE; break; } } else { /* If sorted should not go back to a new chromosome. Use hash to check this */ if (hashLookup(targetHash, target)) { result = FALSE; break; } hashAdd(targetHash, target, NULL); safef(lastTarget, sizeof(lastTarget), "%s", target); } lastPos = pos; } hashFree(&targetHash); return result; }
struct oneStat *totalUtr(struct blatStats *stats) /* Return sum of 5' and 3' UTRs. */ { static struct oneStat acc; ZeroVar(&acc); addStat(&stats->utr5, &acc); addStat(&stats->utr3, &acc); return &acc; }
struct chromAnnMapIter chromAnnMapFirst(struct chromAnnMap *cam) /* get iterator over a chromAnnMap */ { struct chromAnnMapIter iter; ZeroVar(&iter); iter.cam = cam; iter.chromCookie = hashFirst(cam->ranges->hash); return iter; }
struct oneStat *totalSplice(struct blatStats *stats) /* Return sum of 5' and 3' splice sites. */ { static struct oneStat acc; ZeroVar(&acc); addStat(&stats->splice5, &acc); addStat(&stats->splice3, &acc); return &acc; }
struct genbankCds getCds(struct sqlConnection *conn, struct psl *psl) /* Lookup the CDS, either in the database or hash, or generate for query. If * not found and looks like a it has a genbank version, try without the * version. If allCds is true, generate a cds that covers the query. Conn * maybe null if gCdsTable exists or gAllCds or gNoCds are true. If CDS can't be * obtained, start and end are both set to -1. If there is an error parsing * it, start and end are both set to 0. */ { struct genbankCds cds; ZeroVar(&cds); if (gNoCds) { cds.start = -1; cds.end = -1; cds.startComplete = FALSE; cds.endComplete = FALSE; } else if (gAllCds) { cds.start = psl->qStart; cds.end = psl->qEnd; if (psl->strand[0] == '-') reverseIntRange(&cds.start, &cds.end, psl->qSize); cds.startComplete = TRUE; cds.endComplete = TRUE; } else { char cdsBuf[4096]; char *cdsStr = getCdsForAcc(conn, psl->qName, cdsBuf, sizeof(cdsBuf)); if (cdsStr == NULL) { if (!gQuiet) fprintf(stderr, "Warning: no CDS for %s\n", psl->qName); cds.start = cds.end = -1; } else { if (!genbankCdsParse(cdsStr, &cds)) { if (!gQuiet) fprintf(stderr, "Warning: invalid CDS for %s: %s\n", psl->qName, cdsStr); } else if ((cds.end-cds.start) > psl->qSize) { if (!gQuiet) fprintf(stderr, "Warning: CDS for %s (%u..%u) longer than qSize (%u)\n", psl->qName, cds.start, cds.end, psl->qSize); cds.start = cds.end = -1; } } } return cds; }
off_t fileSize(char *pathname) /* get file size for pathname. return -1 if not found */ { struct stat mystat; ZeroVar(&mystat); if (stat(pathname,&mystat)==-1) { return -1; } return mystat.st_size; }
struct oneStat *totalIntron(struct blatStats *stats) /* Return sum of all intron regions. */ { static struct oneStat acc; ZeroVar(&acc); addStat(&stats->firstIntron, &acc); addStat(&stats->middleIntron, &acc); addStat(&stats->endIntron, &acc); addStat(&stats->onlyIntron, &acc); return &acc; }
struct hashCookie gbIgnoreFirst(struct gbIgnore *ignore) /* get cookie to iterate over hash */ { // a zero cookie will return NULL on hashNext() call struct hashCookie cookie; if (ignore->accHash == NULL) ZeroVar(&cookie); else cookie = hashFirst(ignore->accHash); return cookie; }