void faPolyASizes(char *inFile, char *outFile) /* faPolyASizes - get poly-A tail sizes*/ { DNA *seq; int size; char *name; struct lineFile *lf = lineFileOpen(inFile, TRUE); FILE *f = mustOpen(outFile, "w"); while (faSomeSpeedReadNext(lf, &seq, &size, &name, FALSE)) fprintf(f, "%s\t%d\t%d\t%d\n", name, size, maskTailPolyA(seq, size), maskHeadPolyT(seq, size)); }
void polyTrimSeq(struct dnaSeq *seq, FILE *fh) /* trim a sequence */ { if (trimPolyA) { int sz = maskTailPolyA(seq->dna, seq->size); seq->size -= sz; seq->dna[seq->size] = '\0'; } if (trimPolyT) { int sz = maskHeadPolyT(seq->dna, seq->size); seq->size -= sz; seq->dna += sz; } faWriteNext(fh, seq->name, seq->dna, seq->size); }
void trimSeq(struct dnaSeq *seq, struct dnaSeq *trimmed) /* Copy seq to trimmed (shallow copy) and optionally trim * off polyA tail or polyT head. */ { DNA *dna = seq->dna; int size = seq->size; *trimmed = *seq; if (trimT) maskHeadPolyT(dna, size); if (trimA || trimHardA) { int trimSize = maskTailPolyA(dna, size); if (trimHardA) { trimmed->size -= trimSize; dna[size-trimSize] = 0; } } }
void outFaWrite(struct outFa* outFa, struct gbFa* inFa) /* write a record to the output fasta, open or switch to a new file * if needed. */ { if ((maxFaSize > 0) && (outFa->fa != NULL) && (outFa->fa->off > maxFaSize)) outFaClose(outFa); if (outFa->fa == NULL) outFaOpen(outFa); gbFaWriteFromFa(outFa->fa, inFa, NULL); outFa->numSeqs++; outFa->numBases += inFa->seqLen; if (outFa->polyAFh != NULL) { /* note, this modifies the fasta sequence, but we don't care any more */ fprintf(outFa->polyAFh, "%s\t%d\t%d\t%d\n", inFa->id, inFa->seqLen, maskTailPolyA(inFa->seq, inFa->seqLen), maskHeadPolyT(inFa->seq, inFa->seqLen)); } }
void queryResponse(int sd, struct bzp *bzp, struct blatzIndex *indexList) /* Respond to query message - read options and dna from socket, * and do alignment. */ { struct bzp lbzp = *bzp; struct dnaSeq *seq = NULL; char buf[256], *line, *word; char *out = NULL, *mafT = NULL, *mafQ = NULL; /* First get options - overriding what got set at startup. */ for (;;) { if ((line = netGetString(sd, buf)) == NULL) { truncatedQuery(1); return; } word = nextWord(&line); line = skipLeadingSpaces(line); if (sameString(word, "seq")) break; else if (sameString(word, "rna")) lbzp.rna = TRUE; else if (sameString(word, "minScore")) lbzp.minScore = atoi(line); else if (sameString(word, "minGapless")) lbzp.minGapless = atoi(line); else if (sameString(word, "multiHits")) lbzp.multiHits = atoi(line); else if (sameString(word, "minChain")) lbzp.minChain = atoi(line); else if (sameString(word, "maxExtend")) lbzp.maxExtend = atoi(line); else if (sameString(word, "maxBandGap")) lbzp.maxBandGap = atoi(line); else if (sameString(word, "minExpand")) lbzp.minExpand = atoi(line); else if (sameString(word, "expandWindow")) lbzp.expandWindow = atoi(line); else if (sameString(word, "out")) lbzp.out = out = cloneString(line); else if (sameString(word, "mafQ")) lbzp.mafQ = mafQ = cloneString(line); else if (sameString(word, "mafT")) lbzp.mafT = mafT = cloneString(line); } /* Get DNA into seq*/ { char *name = netGetString(sd, buf); char *dna; if (name == NULL) { truncatedQuery(2); return; } dna = netGetHugeString(sd); if (dna == NULL) { truncatedQuery(3); return; } AllocVar(seq); seq->dna = dna; seq->size = strlen(dna); seq->name = cloneString(name); bzpTime("Received %d bases in %s", seq->size, seq->name); if (lbzp.rna) maskTailPolyA(seq->dna, seq->size); } /* Create alignments into chainList and write results. */ { FILE *f = netFileFromSocket(sd); struct chain *chainList = blatzAlign(&lbzp, indexList, seq); blatzWriteChains(&lbzp, &chainList, seq, 0, seq->size, seq->size, indexList, f); bzpTime("sent result - %d chains", slCount(chainList)); carefulClose(&f); } dnaSeqFree(&seq); freez(&out); freez(&mafQ); freez(&mafT); }
static void alignAll(struct bzp *bzp, struct blatzIndex *indexList, struct dnaLoad *queryDl, char *outFile) /* Make up neighorhood index for queryList, and use it to scan * targetList. Put output in outFile */ { FILE *f = mustOpen(outFile, "w"); struct dnaSeq *query; // LX BEG int b, bend, printing; FILE *bedfp = NULL; // See if bed file output of the mask was requested if (differentString(bzp->dynaBedFileQ, "")) bedfp = mustOpen(bzp->dynaBedFileQ, "w"); // Counts all the query-target hits encountered by the program inside the // loops of gapless.c dynaHits = 0; // Counts how many target and query positions reached the limit dynaCountTarget = 0; dynaCountQuery = 0; // This is the limit used by the program, currently just bzp->dynaLimit(QT) // but should be useful for scaling to sequence size targetHitDLimit = VERY_LARGE_NUMBER; // perhaps unnecessary default queryHitDLimit = VERY_LARGE_NUMBER; // perhaps unnecessary default // LX END while ((query = dnaLoadNext(queryDl)) != NULL) { double bestScore = 0; struct chain *chainList; // LX BEG if (bzp->dynaLimitQ<VERY_LARGE_NUMBER) { queryHitDLimit = bzp->dynaLimitQ; // allocate zeroed memory for hit counters AllocArray(dynaCountQ, query->size); } // LX END if (bzp->unmask || bzp->rna) toUpperN(query->dna, query->size); if (bzp->rna) maskTailPolyA(query->dna, query->size); chainList = blatzAlign(bzp, indexList, query); if (chainList != NULL) bestScore = chainList->score; else { if (seqIsLower(query)) warn("Sequence %s is all lower case, and thus ignored. Use -unmask " "flag to unmask lower case sequence.", query->name); } verbose(1, "%s (%d bases) score %2.0f\n", query->name, query->size, bestScore); blatzWriteChains(bzp, &chainList, query, dnaLoadCurStart(queryDl), dnaLoadCurEnd(queryDl), dnaLoadCurSize(queryDl), indexList, f); // LX BEG // This prints the contents of the mask into the .bed file opened above if (bedfp != NULL) { if (bzp->dynaLimitQ<VERY_LARGE_NUMBER) { printing = 0; for (b=0;b<query->size;b++) { if (dynaCountQ[b] > queryHitDLimit) { if (printing == 0) { printing = 1; fprintf(bedfp,"%s %d ",query->name,b); } } if (dynaCountQ[b] <= queryHitDLimit) { if (printing == 1) { printing = 0; bend = b-1; fprintf(bedfp,"%d\n",bend); } } } } else { fprintf(bedfp,"#No dynamic masking data to print.\n"); } } // LX END dnaSeqFree(&query); } // LX BEG // Statistics to print about how many hits were dropped (ignored) dynaDrops = dynaCountTarget + dynaCountQuery; dynaDropsPerc = (float)100*dynaDrops/dynaHits+0.5; verbose(2, "%d dynaDrops (%f%%) at T=%d Q=%d \n", dynaDrops, (double)dynaDropsPerc, targetHitDLimit, queryHitDLimit); // Free dynamic memory used for the sequence-length-dependent counter arrays freeMem(dynaCountQ); if (bedfp != NULL) carefulClose(&bedfp); freeMem(dynaWordCount); // LX END carefulClose(&f); }