Esempio n. 1
0
void faPolyASizes(char *inFile, char *outFile)
/* faPolyASizes - get poly-A tail sizes*/
{
DNA *seq;
int size;
char *name;
struct lineFile *lf = lineFileOpen(inFile, TRUE);
FILE *f = mustOpen(outFile, "w");

while (faSomeSpeedReadNext(lf, &seq, &size, &name, FALSE))
    fprintf(f, "%s\t%d\t%d\t%d\n", name, size,
            maskTailPolyA(seq, size), maskHeadPolyT(seq, size));
}
Esempio n. 2
0
void polyTrimSeq(struct dnaSeq *seq, FILE *fh)
/*  trim a sequence */
{
if (trimPolyA)
    {
    int sz = maskTailPolyA(seq->dna, seq->size);
    seq->size -= sz;
    seq->dna[seq->size] = '\0';
    }
if (trimPolyT)
    {
    int sz = maskHeadPolyT(seq->dna, seq->size);
    seq->size -= sz;
    seq->dna += sz;
    }

faWriteNext(fh, seq->name, seq->dna, seq->size);
}
void trimSeq(struct dnaSeq *seq, struct dnaSeq *trimmed)
/* Copy seq to trimmed (shallow copy) and optionally trim
 * off polyA tail or polyT head. */
{
DNA *dna = seq->dna;
int size = seq->size;
*trimmed = *seq;
if (trimT)
    maskHeadPolyT(dna, size);
if (trimA || trimHardA)
    {
    int trimSize = maskTailPolyA(dna, size);
    if (trimHardA)
	{
	trimmed->size -= trimSize;
	dna[size-trimSize] = 0;
	}
    }
}
void outFaWrite(struct outFa* outFa, struct gbFa* inFa)
/* write a record to the output fasta, open or switch to a new file
 * if needed. */
{
if ((maxFaSize > 0) && (outFa->fa != NULL) && (outFa->fa->off > maxFaSize))
    outFaClose(outFa);
if (outFa->fa == NULL)
    outFaOpen(outFa);
gbFaWriteFromFa(outFa->fa, inFa, NULL);
outFa->numSeqs++;
outFa->numBases += inFa->seqLen;

if (outFa->polyAFh != NULL)
    {
    /* note, this modifies the fasta sequence, but we don't care any more */
    fprintf(outFa->polyAFh, "%s\t%d\t%d\t%d\n", inFa->id, inFa->seqLen,
            maskTailPolyA(inFa->seq, inFa->seqLen),
            maskHeadPolyT(inFa->seq, inFa->seqLen));
    }
}
Esempio n. 5
0
void queryResponse(int sd, struct bzp *bzp, struct blatzIndex *indexList)
/* Respond to query message - read options and dna from socket,
 * and do alignment. */
{
struct bzp lbzp = *bzp;
struct dnaSeq *seq = NULL;
char buf[256], *line, *word;
char *out = NULL, *mafT = NULL, *mafQ = NULL;

/* First get options - overriding what got set at startup. */
for (;;)
    {
    if ((line = netGetString(sd, buf)) == NULL)
         {
         truncatedQuery(1);
         return;
         }
    word = nextWord(&line);
    line = skipLeadingSpaces(line);
    if (sameString(word, "seq"))
        break;
    else if (sameString(word, "rna"))
       lbzp.rna = TRUE;
    else if (sameString(word, "minScore"))
       lbzp.minScore = atoi(line);
    else if (sameString(word, "minGapless"))
       lbzp.minGapless = atoi(line);
    else if (sameString(word, "multiHits"))
       lbzp.multiHits = atoi(line);
    else if (sameString(word, "minChain"))
       lbzp.minChain = atoi(line);
    else if (sameString(word, "maxExtend"))
       lbzp.maxExtend = atoi(line);
    else if (sameString(word, "maxBandGap"))
       lbzp.maxBandGap = atoi(line);
    else if (sameString(word, "minExpand"))
       lbzp.minExpand = atoi(line);
    else if (sameString(word, "expandWindow"))
       lbzp.expandWindow = atoi(line);
    else if (sameString(word, "out"))
       lbzp.out = out = cloneString(line);
    else if (sameString(word, "mafQ"))
       lbzp.mafQ = mafQ = cloneString(line);
    else if (sameString(word, "mafT"))
       lbzp.mafT = mafT = cloneString(line);
    }

/* Get DNA into seq*/
    {
    char *name = netGetString(sd, buf);
    char *dna;
    if (name == NULL)
        {
        truncatedQuery(2);
        return;
        }
    dna = netGetHugeString(sd);
    if (dna == NULL)
        {
        truncatedQuery(3);
        return;
        }
    AllocVar(seq);
    seq->dna = dna;
    seq->size = strlen(dna);
    seq->name = cloneString(name);
    bzpTime("Received %d bases in %s", seq->size, seq->name);
    if (lbzp.rna)
        maskTailPolyA(seq->dna, seq->size);
    }

/* Create alignments into chainList and write results. */
    {
    FILE *f = netFileFromSocket(sd);
    struct chain *chainList = blatzAlign(&lbzp, indexList, seq);
    blatzWriteChains(&lbzp, &chainList, seq, 0, seq->size, seq->size, indexList, f);
    bzpTime("sent result - %d chains", slCount(chainList));
    carefulClose(&f);
    }

dnaSeqFree(&seq);
freez(&out);
freez(&mafQ);
freez(&mafT);
}
static void alignAll(struct bzp *bzp, struct blatzIndex *indexList, 
        struct dnaLoad *queryDl, char *outFile)
/* Make up neighorhood index for queryList, and use it to scan
 * targetList.  Put output in outFile */
{
FILE *f = mustOpen(outFile, "w");
struct dnaSeq *query;

// LX BEG
int b, bend, printing; 
FILE *bedfp = NULL;
// See if bed file output of the mask was requested
if (differentString(bzp->dynaBedFileQ, "")) 
   bedfp = mustOpen(bzp->dynaBedFileQ, "w");
// Counts all the query-target hits encountered by the program inside the 
// loops of gapless.c
dynaHits = 0;
// Counts how many target and query positions reached the limit
dynaCountTarget = 0;
dynaCountQuery = 0;
// This is the limit used by the program, currently just bzp->dynaLimit(QT)
// but should be useful for scaling to sequence size
targetHitDLimit = VERY_LARGE_NUMBER; // perhaps unnecessary default
queryHitDLimit = VERY_LARGE_NUMBER; // perhaps unnecessary default
// LX END

while ((query = dnaLoadNext(queryDl)) != NULL)
    {
    double bestScore = 0;
    struct chain *chainList;
    // LX BEG
    if (bzp->dynaLimitQ<VERY_LARGE_NUMBER)
       {
       queryHitDLimit = bzp->dynaLimitQ;
       // allocate zeroed memory for hit counters
       AllocArray(dynaCountQ, query->size);
       }
    // LX END
    if (bzp->unmask || bzp->rna)
        toUpperN(query->dna, query->size);
    if (bzp->rna)
        maskTailPolyA(query->dna, query->size);
    chainList = blatzAlign(bzp, indexList, query);
    if (chainList != NULL) 
    	bestScore = chainList->score;
    else
        {
	if (seqIsLower(query))
	    warn("Sequence %s is all lower case, and thus ignored. Use -unmask "
	         "flag to unmask lower case sequence.", query->name);
	}
    verbose(1, "%s (%d bases) score %2.0f\n", 
            query->name, query->size, bestScore);
    blatzWriteChains(bzp, &chainList, query, 
    	dnaLoadCurStart(queryDl), dnaLoadCurEnd(queryDl),
	dnaLoadCurSize(queryDl), indexList, f);
    // LX BEG
    // This prints the contents of the mask into the .bed file opened above
    if (bedfp != NULL)
       {
       if (bzp->dynaLimitQ<VERY_LARGE_NUMBER)
          {
          printing = 0;
          for (b=0;b<query->size;b++)
              {
              if (dynaCountQ[b] > queryHitDLimit)
                 {
                 if (printing == 0)
                    {
                    printing = 1;
                    fprintf(bedfp,"%s %d ",query->name,b);
                    }
                 }
              if (dynaCountQ[b] <= queryHitDLimit)
                 {
                 if (printing == 1)
                    {
                    printing = 0;
                    bend = b-1;
                    fprintf(bedfp,"%d\n",bend);
                    }
                 }
              }
           }
        else
           {
           fprintf(bedfp,"#No dynamic masking data to print.\n");
           }
        }
    // LX END
    dnaSeqFree(&query);
    }
    // LX BEG
    // Statistics to print about how many hits were dropped (ignored)
    dynaDrops = dynaCountTarget + dynaCountQuery;
    dynaDropsPerc = (float)100*dynaDrops/dynaHits+0.5;
    verbose(2, "%d dynaDrops (%f%%) at T=%d Q=%d \n", 
    	dynaDrops, (double)dynaDropsPerc, targetHitDLimit, queryHitDLimit);
   // Free dynamic memory used for the sequence-length-dependent counter arrays
   freeMem(dynaCountQ);
   if (bedfp != NULL)
      carefulClose(&bedfp);
   freeMem(dynaWordCount);
   // LX END
carefulClose(&f);
}