void writePslFrags(struct psl *psl, FILE *f) /* Look into psl and figure out if we want to write out * all or part of it. */ { int i; int totalSize = 0; int size = pslBlockTotalSize(psl); for (i=0; i<psl->blockCount; ++i) { if ((size = psl->blockSizes[i]) >= 30) { static struct psl p; unsigned blockSizes, qStarts, tStarts; p.match = roundingScale(psl->match, size, totalSize); p.misMatch = roundingScale(psl->misMatch, size, totalSize); p.repMatch = roundingScale(psl->repMatch, size, totalSize); p.nCount = roundingScale(psl->nCount, size, totalSize); p.strand[0] = psl->strand[0]; p.strand[1] = psl->strand[1]; p.qName = psl->qName; p.qSize = psl->qSize; p.tName =psl->tName; p.tSize = psl->tSize; p.blockCount = 1; p.blockSizes = &blockSizes; blockSizes = size; p.qStarts = &qStarts; qStarts = psl->qStarts[i]; p.tStarts = &tStarts; tStarts = psl->tStarts[i]; if (p.strand[0] == '-') p.qStart = psl->qSize - (qStarts + size); else p.qStart = qStarts; if (p.strand[1] == '-') p.tStart = psl->tSize - (tStarts + size); else p.tStart = tStarts; p.qEnd = p.qStart + size; p.tEnd = p.tStart + size; pslTabOut(&p, f); } } }
void pslMrnaCover(char *pslFile, char *faFile) /* pslMrnaCover - Make histogram of coverage percentage of mRNA in psl. */ { static int histogram[101]; int i; int qAli; struct hash *hash; struct rnaCover *rcList = NULL, *rc; struct lineFile *lf = pslFileOpen(pslFile); struct psl *psl; /* Build up list of all sequences. */ readFa(faFile, &rcList, &hash); /* Scan psls and see maximum amount each is aligned. */ while ((psl = pslNext(lf)) != NULL) { if (psl->qSize >= minSize) { if ((rc = hashFindVal(hash, psl->qName)) == NULL) errAbort("%s is in %s but not %s", psl->qName, pslFile, faFile); if (rc->qSize != psl->qSize) errAbort("%s is %d bytes in %s but %d in %s", psl->qName, rc->qSize, faFile, psl->qSize, pslFile); qAli = psl->match + psl->repMatch + psl->misMatch; if (qAli > rc->qMaxAli) rc->qMaxAli = qAli; } pslFree(&psl); } lineFileClose(&lf); /* Open file to keep track of non-aligners */ if (listZero != NULL) { FILE *f = mustOpen(listZero, "w"); for (rc = rcList; rc != NULL; rc = rc->next) { if (rc->qMaxAli == 0) fprintf(f, "%s\t%d\n", rc->name, rc->qSize); } } /* Talley up percentage aligning in histogram. */ for (rc = rcList; rc != NULL; rc = rc->next) { int histIx = roundingScale(100, rc->qMaxAli, rc->qSize); assert(histIx <= 100); histogram[histIx] += 1; } /* Print out histogram. */ for (i=0; i<=100; ++i) { printf("%3d%% %6d\n", i, histogram[i]); } }
boolean detailTest(struct psl *psl) /* Detailed pass/fail test. */ { int size = pslBlockTotalSize(psl); int badFactor = psl->misMatch + psl->tNumInsert + psl->qNumInsert + 2*log(1+psl->tBaseInsert + psl->qBaseInsert); int milliBad = roundingScale(1000, badFactor, size); if (sameString(psl->qName, "ti|18649044")) { static int maxc = 10; uglyf("%s: size %d, badFactor %d, milliBad %d\n", psl->qName, size, badFactor, milliBad); if (--maxc == 0) uglyAbort("All for now"); } #ifdef NEVER #endif /* NEVER */ if (milliBad < 85) return FALSE; return TRUE; }
void aliTrack(char *bacAcc, char *wholeName, char *partsName, struct memGfx *mg, int x, int y, FILE *mapFile, int trim, char *repeatMask) /* Write out one alignment track. */ { struct dnaSeq *whole, *partList, *part; bits16 contig; int maxBlockSize = 5000; int wholeSize; struct patSpace *ps; DNA *wholeDna; whole = faReadAllDna(wholeName); if (slCount(whole) > 1) warn("%d sequences in %s, only using first", slCount(whole), wholeName); wholeDna = whole->dna; wholeSize = whole->size; ps = makePatSpace(&whole, 1, oocFile, 5, 500); partList = faReadAllDna(partsName); printf("%d contigs in %s\n\n", slCount(partList), partsName); for (part = partList, contig = 0; part != NULL; part = part->next, ++contig) { DNA *dna = part->dna; int dnaSize = part->size; int start, size; int subIx = 0; char numText[12]; Color color = blockColors[contig%ArraySize(blockColors)]; sprintf(numText, "%d", contig+1); for (start = trim; start < dnaSize-trim; start += size) { struct ffAli *left, *right; boolean rc; int score; size = dnaSize - start-trim; if (size > maxBlockSize) size = maxBlockSize; if (!fastFind(dna+start, size, ps, &left, &rc, &score) ) { printf("Contig %d.%d:%d-%d of %d UNALIGNED\n", contig+1, subIx, start, start+size, dnaSize); } else { int x1, x2; int xo, w; double quality; int qStart, qSize, tStart,tSize; char qualityString[40]; right = left; while (right->right != NULL) right = right->right; qStart = left->nStart - dna; qSize = right->nEnd - left->nStart; if (rc) { int rcEnd = right->nEnd - (dna+start) - 1; qStart = reverseOffset(rcEnd, size) + start; } tStart = left->hStart - wholeDna; tSize = right->hEnd - left->hStart; quality = 100.0 * score / qSize; if (quality >= 25.0) sprintf(qualityString, "%4.1f%%", quality); else sprintf(qualityString, "<50%%"); printf("<A HREF=\"../cgi-bin/chkGlue.exe?bacAcc=%s&contig=%d&qStart=%d&qSize=%d&tStart=%d&tSize=%d&repeatMask=%s\">", bacAcc, contig, qStart, qSize, tStart, tSize, repeatMask); printf("Contig %d.%d:%d-%d %c of %d aligned %d-%d of %d aliSize %d quality %s</A>\n", contig+1, subIx, qStart, qStart+qSize, (rc ? '-' : '+'), dnaSize, tStart, tStart + tSize, wholeSize, qSize, qualityString); x1 = roundingScale(trackWidth, left->hStart - wholeDna, wholeSize); x2 = roundingScale(trackWidth, right->hEnd - wholeDna, wholeSize); xo = x1+x; w = x2-x1; mapWriteBox(mapFile, mtBlock, xo, y, w, trackHeight, bacAcc, contig, qStart, qSize, tStart, tSize); mgDrawBox(mg, xo, y, w, trackHeight, color); mgTextCentered(mg, xo, y, w, trackHeight, MG_WHITE, font, numText); ffFreeAli(&left); } ++subIx; } } freePatSpace(&ps); freeAllSeq(&whole); freeAllSeq(&partList); }
static void savePslx(char *chromName, int chromSize, int chromOffset, struct ffAli *ali, struct dnaSeq *tSeq, struct dnaSeq *qSeq, boolean isRc, enum ffStringency stringency, int minMatch, FILE *f, struct hash *t3Hash, boolean reportTargetStrand, boolean targetIsRc, struct hash *maskHash, int minIdentity, boolean qIsProt, boolean tIsProt, boolean saveSeq) /* Analyse one alignment and if it looks good enough write it out to file in * psl format (or pslX format - if saveSeq is TRUE). */ { /* This function was stolen from psLayout and slightly extensively to cope * with protein as well as DNA aligments. */ struct ffAli *ff, *nextFf; struct ffAli *right = ffRightmost(ali); DNA *needle = qSeq->dna; DNA *hay = tSeq->dna; int nStart = ali->nStart - needle; int nEnd = right->nEnd - needle; int hStart, hEnd; int nInsertBaseCount = 0; int nInsertCount = 0; int hInsertBaseCount = 0; int hInsertCount = 0; int matchCount = 0; int mismatchCount = 0; int repMatch = 0; int countNs = 0; DNA *np, *hp, n, h; int blockSize; int i; struct trans3 *t3List = NULL; Bits *maskBits = NULL; if (maskHash != NULL) maskBits = hashMustFindVal(maskHash, tSeq->name); if (t3Hash != NULL) t3List = hashMustFindVal(t3Hash, tSeq->name); hStart = trans3GenoPos(ali->hStart, tSeq, t3List, FALSE) + chromOffset; hEnd = trans3GenoPos(right->hEnd, tSeq, t3List, TRUE) + chromOffset; /* Count up matches, mismatches, inserts, etc. */ for (ff = ali; ff != NULL; ff = nextFf) { nextFf = ff->right; blockSize = ff->nEnd - ff->nStart; np = ff->nStart; hp = ff->hStart; for (i=0; i<blockSize; ++i) { n = np[i]; h = hp[i]; if (n == 'n' || h == 'n') ++countNs; else { if (n == h) { if (maskBits != NULL) { int seqOff = hp + i - hay; if (bitReadOne(maskBits, seqOff)) ++repMatch; else ++matchCount; } else ++matchCount; } else ++mismatchCount; } } if (nextFf != NULL) { int nhStart = trans3GenoPos(nextFf->hStart, tSeq, t3List, FALSE) + chromOffset; int ohEnd = trans3GenoPos(ff->hEnd, tSeq, t3List, TRUE) + chromOffset; int hGap = nhStart - ohEnd; int nGap = nextFf->nStart - ff->nEnd; if (nGap != 0) { ++nInsertCount; nInsertBaseCount += nGap; } if (hGap != 0) { ++hInsertCount; hInsertBaseCount += hGap; } } } /* See if it looks good enough to output, and output. */ /* if (score >= minMatch) Moved to higher level */ { int gaps = nInsertCount + (stringency == ffCdna ? 0: hInsertCount); int id = roundingScale(1000, matchCount + repMatch - 2*gaps, matchCount + repMatch + mismatchCount); if (id >= minIdentity) { if (isRc) { int temp; int oSize = qSeq->size; temp = nStart; nStart = oSize - nEnd; nEnd = oSize - temp; } if (targetIsRc) { int temp; temp = hStart; hStart = chromSize - hEnd; hEnd = chromSize - temp; } fprintf(f, "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%c", matchCount, mismatchCount, repMatch, countNs, nInsertCount, nInsertBaseCount, hInsertCount, hInsertBaseCount, (isRc ? '-' : '+')); if (reportTargetStrand) fprintf(f, "%c", (targetIsRc ? '-' : '+') ); fprintf(f, "\t%s\t%d\t%d\t%d\t" "%s\t%d\t%d\t%d\t%d\t", qSeq->name, qSeq->size, nStart, nEnd, chromName, chromSize, hStart, hEnd, ffAliCount(ali)); for (ff = ali; ff != NULL; ff = ff->right) fprintf(f, "%ld,", (long)(ff->nEnd - ff->nStart)); fprintf(f, "\t"); for (ff = ali; ff != NULL; ff = ff->right) fprintf(f, "%ld,", (long)(ff->nStart - needle)); fprintf(f, "\t"); for (ff = ali; ff != NULL; ff = ff->right) fprintf(f, "%d,", trans3GenoPos(ff->hStart, tSeq, t3List, FALSE) + chromOffset); if (saveSeq) { fputc('\t', f); for (ff = ali; ff != NULL; ff = ff->right) { mustWrite(f, ff->nStart, ff->nEnd - ff->nStart); fputc(',', f); } fputc('\t', f); for (ff = ali; ff != NULL; ff = ff->right) { mustWrite(f, ff->hStart, ff->hEnd - ff->hStart); fputc(',', f); } } fprintf(f, "\n"); if (ferror(f)) { perror(""); errAbort("Write error to .psl"); } } } }