void axtAndBed(char *inAxt, char *inBed, char *outAxt) /* axtAndBed - Intersect an axt with a bed file and output axt.. */ { struct hash *tHash = readBed(inBed); /* target keyed, binKeeper value */ struct lineFile *lf = lineFileOpen(inAxt, TRUE); struct axt *axt; struct binElement *list = NULL, *el; FILE *f = mustOpen(outAxt, "w"); struct axtScoreScheme *ss = axtScoreSchemeDefault(); while ((axt = axtRead(lf)) != NULL) { struct chromInfo *ci = hashFindVal(tHash, axt->tName); if (ci != NULL) { list = binKeeperFind(ci->bk, axt->tStart, axt->tEnd); if (list != NULL) { /* Flatten out any overlapping elements by projecting them * onto a 0/1 valued character array and then looking for * runs of 1 in this array. */ int tStart = axt->tStart; int tEnd = axt->tEnd; int tSize = tEnd - tStart; int i, s = 0; char c, lastC = 0; char *merger = NULL; AllocArray(merger, tSize+1); for (el = list; el != NULL; el = el->next) { int s = el->start - tStart; int e = el->end - tStart; int sz; if (s < 0) s = 0; if (e > tSize) e = tSize; sz = e - s; if (sz > 0) memset(merger + s, 1, sz); } for (i=0; i<=tSize; ++i) { c = merger[i]; if (c && !lastC) { s = i; lastC = c; } else if (!c && lastC) { axtSubsetOnT(axt, s+tStart, i+tStart, ss, f); lastC = c; } } freez(&merger); slFreeList(&list); } } axtFree(&axt); } }
struct seqPair *readAxtBlocks(char *fileName, struct hash *pairHash, FILE *f) /* Read in axt file and parse blocks into pairHash */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); struct dyString *dy = newDyString(512); struct axt *axt; struct seqPair *spList = NULL, *sp; lineFileSetMetaDataOutput(lf, f); lineFileSetUniqueMetaData(lf); while ((axt = axtRead(lf)) != NULL) { dyStringClear(dy); dyStringPrintf(dy, "%s%c%s", axt->qName, axt->qStrand, axt->tName); sp = hashFindVal(pairHash, dy->string); if (sp == NULL) { AllocVar(sp); slAddHead(&spList, sp); hashAddSaveName(pairHash, dy->string, sp, &sp->name); sp->qName = cloneString(axt->qName); sp->tName = cloneString(axt->tName); sp->qStrand = axt->qStrand; } axtAddBlocksToBoxInList(&sp->blockList, axt); sp->axtCount += 1; axtFree(&axt); } lineFileClose(&lf); dyStringFree(&dy); slSort(&spList, seqPairCmp); return spList; }
void axtToPsl(char *inName, char *tSizeFile, char *qSizeFile, char *outName) /* axtToPsl - Convert axt to psl format. */ { struct hash *tSizeHash = readSizes(tSizeFile); struct hash *qSizeHash = readSizes(qSizeFile); struct lineFile *lf = lineFileOpen(inName, TRUE); char strand[2]; FILE *f = mustOpen(outName, "w"); struct psl* psl; struct axt *axt; strand[1] = '\0'; while ((axt = axtRead(lf)) != NULL) { int qSize = findSize(qSizeHash, axt->qName); int qStart = axt->qStart; int qEnd = axt->qEnd; if (axt->qStrand == '-') reverseIntRange(&qStart, &qEnd, qSize); strand[0] = axt->qStrand; psl = pslFromAlign(axt->qName, qSize, qStart, qEnd, axt->qSym, axt->tName, findSize(tSizeHash, axt->tName), axt->tStart, axt->tEnd, axt->tSym, strand, PSL_IS_SOFTMASK); if (psl != NULL) { pslTabOut(psl, f); pslFree(&psl); } axtFree(&axt); } lineFileClose(&lf); carefulClose(&f); }
void predict(struct c1Counts cKozak[10], struct c1Counts *cAll, char *axtFile, char *outFile, struct hash *rsiHash) /* Predict location of initial ATG */ { struct lineFile *lf = lineFileOpen(axtFile, TRUE); FILE *f = mustOpen(outFile, "w"); struct oddsMatrix kozak[10]; int i; int bestPos, firstPos, actualPos; double bestScore, firstScore, actualScore; struct axt *axt; for (i=0; i<10; ++i) countToOdds(&cKozak[i], cAll, &kozak[i]); while ((axt = axtRead(lf)) != NULL) { struct refSeqInfo *rsi = hashFindVal(rsiHash, axt->tName); if (rsi != NULL && rsi->cdsStart >= 5) { findBestHit(axt, kozak, 10, &bestPos, &bestScore, &firstPos, &firstScore); actualPos = tIxToSymIx(axt, rsi->cdsStart - 5); actualScore = scoreMotif(kozak, 10, axt->tSym+actualPos, axt->qSym + actualPos); /* Score motif at position. */ fprintf(f, "%s\t%d\t%f\t%d\t%f\t%d\t%f\n", axt->tName, rsi->cdsStart, actualScore, tIxFromSymIx(axt, bestPos) + 5, bestScore, tIxFromSymIx(axt, firstPos) + 5, firstScore); } axtFree(&axt); } carefulClose(&f); lineFileClose(&lf); }
void twinOrfStats(char *axtFile, char *raFile, char *outFile) /* twinOrfStats - Collect stats on refSeq cDNAs aligned to another species via axtForEst. */ { struct hash *rsiHash = readRefRa(raFile); struct lineFile *lf = lineFileOpen(axtFile, TRUE); FILE *f = mustOpen(outFile, "w"); struct axt *axt; static struct countMatrix kozak[10], all, utr5, utr3, cds; static struct c2Counts c2All, c2Utr5, c2Utr3, c2Cds; char label[64]; char *predictFile = optionVal("predict", NULL); int i; struct codonCounts codons; initCounts(&codons, 1); threshold = optionFloat("threshold", threshold); while ((axt = axtRead(lf)) != NULL) { struct refSeqInfo *rsi = hashFindVal(rsiHash, axt->tName); if (rsi != NULL && rsi->cdsStart >= 5) { if (checkAtg(axt, rsi->cdsStart)) { for (i=0; i<10; ++i) addPos(&kozak[i], axt, rsi->cdsStart - 5 + i); addRange(&all, &c2All, axt, 0, rsi->size); addRange(&utr5, &c2Utr5, axt, 0, rsi->cdsStart); addRange(&cds, &c2Cds, axt, rsi->cdsStart, rsi->cdsEnd); addRange(&utr3, &c2Utr3, axt, rsi->cdsEnd, rsi->size); addCodons(&codons, axt, rsi->cdsStart, rsi->cdsEnd-3); } } axtFree(&axt); } lineFileClose(&lf); dumpCounts(f, &all, "all"); dumpCounts(f, &utr5, "utr5"); dumpCounts(f, &cds, "cds"); dumpCounts(f, &utr3, "utr3"); dumpM1(f, &c2All, "c2_all"); dumpM1(f, &c2Utr5, "c2_utr5"); dumpM1(f, &c2Cds, "c2_cds"); dumpM1(f, &c2Utr3, "c2_utr3"); for (i=0; i<10; ++i) { sprintf(label, "kozak[%d]", i-5); dumpCounts(f, &kozak[i], label); } dumpCodon(f, &codons, "codon"); if (predictFile) { predict(kozak, &all, axtFile, predictFile, rsiHash); } }
void writeMousePartsAsMaf(FILE *f, struct hash *mouseHash, char *ratMouseDir, char *mouseChrom, int mouseStart, int mouseEnd, int mouseChromSize, struct hash *rSizeHash, struct hash *dupeHash) /* Write out mouse/rat alignments that intersect given region of mouse. * This gets a little involved because we need to do random access on * the mouse/rat alignment files, which are too big to fit into memory. * On disk we have a mouse/rat alignment file for each mouse chromosome, * and an index of it. When we first access a mouse chromosome we load * the index for that chromosome into memory, and open the alignment file. * We then do a seek and read to load a particular alignment. */ { struct mouseChromCache *mcc = NULL; struct binElement *list = NULL, *el; char aliName[512]; /* Get cache for this mouse chromosome */ mcc = hashFindVal(mouseHash, mouseChrom); if (mcc == NULL) { mcc = newMouseChromCache(mouseChrom, mouseChromSize, ratMouseDir); hashAdd(mouseHash, mouseChrom, mcc); } if (mcc->lf == NULL) return; /* Get list of positions and process one axt into a maf for each */ list = binKeeperFindSorted(mcc->bk, mouseStart, mouseEnd); for (el = list; el != NULL; el = el->next) { struct axt *axt; struct mafAli temp; long long *pPos, pos; pPos = el->val; pos = *pPos; sprintf(aliName, "%s.%lld", mouseChrom, pos); if (!hashLookup(dupeHash, aliName)) { int rChromSize; hashAdd(dupeHash, aliName, NULL); lineFileSeek(mcc->lf, pos, SEEK_SET); axt = axtRead(mcc->lf); rChromSize = hashIntVal(rSizeHash, axt->qName); prefixAxt(axt, rPrefix, mPrefix); mafFromAxtTemp(axt, mouseChromSize, rChromSize, &temp); mafWriteGood(f, &temp); axtFree(&axt); } } slFreeList(&list); }
void axtPretty(char *inName, char *outName) /* axtPretty - Convert axt to more human readable format.. */ { struct lineFile *lf = lineFileOpen(inName, TRUE); FILE *f = mustOpen(outName, "w"); struct axt *axt; int lineSize = optionInt("line", 70); while ((axt = axtRead(lf)) != NULL) { axtOutPretty(axt, lineSize, f); axtFree(&axt); } }
void axtSwapFile(char *source, char *targetSizes, char *querySizes, char *dest) /* axtSwapFile - Swap source and query in an axt file. */ { struct hash *tHash = loadIntHash(targetSizes); struct hash *qHash = loadIntHash(querySizes); struct lineFile *lf = lineFileOpen(source, TRUE); FILE *f = mustOpen(dest, "w"); struct axt *axt; while ((axt = axtRead(lf)) != NULL) { axtSwap(axt, hashIntVal(tHash, axt->tName), hashIntVal(qHash, axt->qName)); axtWrite(axt, f); axtFree(&axt); } }
void axtDropSelf(char *inFile, char *outFile) /* axtDropSelf - Drop alignments that just align same thing to itself. */ { FILE *f = mustOpen(outFile, "w"); struct lineFile *lf = lineFileOpen(inFile, TRUE); struct axt *axt; while ((axt = axtRead(lf)) != NULL) { if (axt->qStart != axt->tStart || axt->qEnd != axt->tEnd || axt->qStrand != axt->tStrand || !sameString(axt->qName, axt->tName)) { axtWrite(axt,f); } axtFree(&axt); } }
void axtSplitByTarget(char *inName, char *outDir) /* axtSplitByTarget - Split a single axt file into one file per target. */ { struct hash *outHash = newHash(8); /* FILE valued hash */ struct lineFile *lf = lineFileOpen(inName, TRUE); struct axt *axt; makeDir(outDir); while ((axt = axtRead(lf)) != NULL) { FILE *f = getSplitFile(outHash, outDir, axt->tName, axt->tStart); axtWrite(axt, f); totalWritten += strlen(axt->tName) + strlen(axt->qName) + 40 + strlen(axt->qSym)+ strlen(axt->tSym); axtFree(&axt); } }
void subsetAxt(char *inName, char *outName, char *scoreFile, int threshold) /* subsetAxt - Rescore alignments and output those over threshold. */ { struct axtScoreScheme *ss = axtScoreSchemeRead(scoreFile); struct lineFile *lf = lineFileOpen(inName, TRUE); FILE *f = mustOpen(outName, "w"); struct axt *axt; if (threshold <= 0) errAbort("Threshold must be a positive number"); while ((axt = axtRead(lf)) != NULL) { subsetOne(axt, ss, threshold, f); axtFree(&axt); axt = NULL; } }
struct binKeeper *loadAxtsIntoRange(char *fileName, char *tPrefix, char *qPrefix) /* Read in an axt file and shove it into a bin-keeper. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); struct binKeeper *bk = binKeeperNew(0, maxChromSize); struct axt *axt; int count = 0; while ((axt = axtRead(lf)) != NULL) { binKeeperAdd(bk, axt->tStart, axt->tEnd, axt); ++count; } uglyf("LOaded %d from %s\n", count, fileName); lineFileClose(&lf); return bk; }
void axtIndex(char *in, char *out) /* axtIndex - Create summary file for axt. */ { struct lineFile *lf = lineFileOpen(in, TRUE); FILE *f = mustOpen(out, "w"); struct axt *axt; for (;;) { off_t pos = lineFileTell(lf); axt = axtRead(lf); if (axt == NULL) break; fprintf(f, "%d %d %lld\n", axt->tStart, axt->tEnd - axt->tStart, (unsigned long long) pos); axtFree(&axt); } carefulClose(&f); }
void newStitch3(char *axtFile, char *output) /* newStitch3 - Another stitching experiment - with kd-trees.. */ { struct hash *pairHash = newHash(0); /* Hash keyed by qSeq<strand>tSeq */ struct dyString *dy = newDyString(512); struct lineFile *lf = lineFileOpen(axtFile, TRUE); struct axt *axt; struct seqPair *spList = NULL, *sp; FILE *f = mustOpen(output, "w"); /* Read input file and divide alignments into various parts. */ while ((axt = axtRead(lf)) != NULL) { struct cBlock *block; if (axt->score < 500) { axtFree(&axt); continue; } dyStringClear(dy); dyStringPrintf(dy, "%s%c%s", axt->qName, axt->qStrand, axt->tName); sp = hashFindVal(pairHash, dy->string); if (sp == NULL) { AllocVar(sp); slAddHead(&spList, sp); hashAddSaveName(pairHash, dy->string, sp, &sp->name); } AllocVar(block); block->qStart = axt->qStart; block->qEnd = axt->qEnd; block->tStart = axt->tStart; block->tEnd = axt->tEnd; block->score = axt->score; slAddHead(&sp->blockList, block); axtFree(&axt); } for (sp = spList; sp != NULL; sp = sp->next) { slReverse(&sp->blockList); chainPair(sp, f); } dyStringFree(&dy); }
void axtRescore(char *in, char *out) /* axtRescore - Recalculate scores in axt. */ { struct lineFile *lf = lineFileOpen(in, TRUE); FILE *f = mustOpen(out, "w"); struct axt *axt; lineFileSetMetaDataOutput(lf, f); axtScoreSchemeDnaWrite(scoreScheme, f, "axtRescore"); for (;;) { axt = axtRead(lf); if (axt == NULL) break; axt->score = axtScore(axt, scoreScheme); axtWrite(axt, f); axtFree(&axt); } }
void axtDropOverlap(char *inName, char *tSizeFile, char *qSizeFile, char *outName) /* used for cleaning up self alignments - deletes all overlapping self alignments */ { struct hash *qSizeHash = readSizes(qSizeFile); struct lineFile *lf = lineFileOpen(inName, TRUE); FILE *f = mustOpen(outName, "w"); struct axt *axt; int totMatch = 0; int totSkip = 0; int totLines = 0; while ((axt = axtRead(lf)) != NULL) { totLines++; totMatch += axt->score; if (sameString(axt->qName, axt->tName)) { int qs = axt->qStart; int qe = axt->qEnd; if (axt->qStrand == '-') reverseIntRange(&qs, &qe, findSize(qSizeHash, axt->qName)); if (axt->tStart == qs && axt->tEnd == qe) { /* printf( "skip %c\t%s\t%d\t%d\t%d\t%s\t%d\t%d\t%d\n", axt->qStrand, axt->qName, axt->symCount, axt->qStart, axt->qEnd, axt->tName, axt->symCount, axt->tStart, axt->tEnd ); */ totSkip++; continue; } } axtWrite(axt, f); axtFree(&axt); } fclose(f); lineFileClose(&lf); }
void setAliBits(char *axtBestDir, char *chrom, int chromSize, Bits *aliBits, Bits *matchBits) /* Set bits where there are alignments and matches. */ { char axtFileName[512]; struct axt *axt; struct lineFile *lf; sprintf(axtFileName, "%s/%s.axt", axtBestDir, chrom); if ((lf = lineFileMayOpen(axtFileName, TRUE)) == NULL) { warn("Couldn't open %s", axtFileName); return; } while ((axt = axtRead(lf)) != NULL) { axtSetBits(axt, chromSize, aliBits, matchBits); axtFree(&axt); } lineFileClose(&lf); }
struct mafAli *axtLoadAsMafInRegion(struct sqlConnection *conn, char *table, char *chrom, int start, int end, char *tPrefix, char *qPrefix, int tSize, struct hash *qSizeHash) /* Return list of alignments in region from axt external file as a maf. */ { char **row; unsigned int extFileId = 0; struct lineFile *lf = NULL; struct mafAli *maf, *mafList = NULL; struct axt *axt; int rowOffset; struct sqlResult *sr = hRangeQuery(conn, table, chrom, start, end, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { struct scoredRef ref; scoredRefStaticLoad(row + rowOffset, &ref); if (ref.extFile != extFileId) { char *path = hExtFileName(sqlGetDatabase(conn),"extFile", ref.extFile); lf = lineFileOpen(path, TRUE); extFileId = ref.extFile; } lineFileSeek(lf, ref.offset, SEEK_SET); axt = axtRead(lf); if (axt == NULL) internalErr(); maf = mafFromAxt(axt, tSize, tPrefix, hashIntVal(qSizeHash, axt->qName), qPrefix); axtFree(&axt); slAddHead(&mafList, maf); } sqlFreeResult(&sr); lineFileClose(&lf); slReverse(&mafList); return mafList; }
void axtQueryCount(char *fileName) /* axtQueryCount - Count bases covered on each query sequence. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); struct hash *hash = newHash(0); struct axt *axt; struct qInfo *qList = NULL, *q; while ((axt = axtRead(lf)) != NULL) { char *qName = axt->qName; if ((q = hashFindVal(hash, qName)) == NULL) { AllocVar(q); slAddHead(&qList, q); hashAddSaveName(hash, qName, q, &q->name); } q->covered += axt->qEnd - axt->qStart; axtFree(&axt); } slSort(&qList, qInfoCmpName); for (q = qList; q != NULL; q = q->next) printf("%s\t%d\n", q->name, q->covered); }
void ggcChrom(struct chromGenes *chrom, char *axtFile, struct ggcInfo *g, struct hash *restrictHash, FILE *fParts) /* Tabulate matches on chromosome. */ { struct lineFile *lf = lineFileOpen(axtFile, TRUE); bool *hits, *covers; int hitCount = 0, coverCount = 0; struct axt *axt; struct genePred *gp; int closeSize = g->closeSize; int closeHalf = closeSize/2; /* Build up array of booleans - one per base - which are * 1's where mouse/human align and bases match, zero * elsewhere. */ AllocArray(hits, chrom->size); AllocArray(covers, chrom->size); printf("%s (%d bases)\n", chrom->name, chrom->size); while ((axt = axtRead(lf)) != NULL) { int tPos = axt->tStart; int symCount = axt->symCount, i; char t, q, *tSym = axt->tSym, *qSym = axt->qSym; if (axt->tEnd > chrom->size) errAbort("tEnd %d, chrom size %d in %s", axt->tEnd, chrom->size, axtFile); if (axt->tStrand == '-') errAbort("Can't handle minus strand on target in %s", axtFile); for (i=0; i<symCount; ++i) { t = tSym[i]; if (t != '-') { q = qSym[i]; if (toupper(t) == toupper(q)) { hits[tPos] = TRUE; ++hitCount; } if (q == '-') covers[tPos] = 1; else covers[tPos] = 2; ++tPos; } } axtFree(&axt); } for (gp = chrom->geneList; gp != NULL; gp = gp->next) { int exonIx; int utr3Size = 0, utr5Size = 0, cdsAllSize = 0; int utr3Pos = 0, utr5Pos = 0, cdsAllPos = 0; bool *utr3Hits = NULL, *utr3Covers = NULL; bool *utr5Hits = NULL, *utr5Covers = NULL; bool *cdsAllHits = NULL, *cdsAllCovers = NULL; bool isRev = (gp->strand[0] == '-'); /* Filter out genes not in restrict hash if any. */ ++totalGenes; if (restrictHash != NULL) if (!hashLookup(restrictHash, gp->name)) continue; ++reviewedGenes; /* Filter out genes without meaningful UTRs */ if (gp->cdsStart - gp->txStart < g->closeSize/2 || gp->txEnd - gp->cdsEnd < g->closeSize/2) continue; ++genesUsed; /* Total up UTR and CDS sizes. */ for (exonIx=0; exonIx<gp->exonCount; ++exonIx) { int eStart = gp->exonStarts[exonIx]; int eEnd = gp->exonEnds[exonIx]; int eSize = eEnd - eStart; int oneUtr, oneCds; oneCds = rangeIntersection(gp->cdsStart, gp->cdsEnd, eStart, eEnd); if (oneCds > 0) { cdsAllSize += oneCds; } if (eStart < gp->cdsStart) { int utrStart = eStart; int utrEnd = min(gp->cdsStart, eEnd); int utrSize = utrEnd - utrStart; if (isRev) utr3Size += utrSize; else utr5Size += utrSize; } if (eEnd > gp->cdsEnd) { int utrStart = max(gp->cdsEnd, eStart); int utrEnd = eEnd; int utrSize = utrEnd - utrStart; if (isRev) utr5Size += utrSize; else utr3Size += utrSize; } } /* Condense hits from UTRs and CDSs */ if (utr5Size > 0) { AllocArray(utr5Hits, utr5Size); AllocArray(utr5Covers, utr5Size); } if (utr3Size > 0) { AllocArray(utr3Hits, utr3Size); AllocArray(utr3Covers, utr3Size); } if (cdsAllSize > 0) { AllocArray(cdsAllHits, cdsAllSize); AllocArray(cdsAllCovers, cdsAllSize); } for (exonIx=0; exonIx<gp->exonCount; ++exonIx) { int eStart = gp->exonStarts[exonIx]; int eEnd = gp->exonEnds[exonIx]; int eSize = eEnd - eStart; int oneUtr, oneCds; oneCds = rangeIntersection(gp->cdsStart, gp->cdsEnd, eStart, eEnd); if (oneCds > 0) { int cdsStart = eStart; int cdsEnd = gp->cdsEnd; if (cdsStart < gp->cdsStart) cdsStart = gp->cdsStart; memcpy(cdsAllHits + cdsAllPos, hits + cdsStart, oneCds * sizeof(*hits)); memcpy(cdsAllCovers + cdsAllPos, covers + cdsStart, oneCds * sizeof(*covers)); cdsAllPos += oneCds; } if (eStart < gp->cdsStart) { int utrStart = eStart; int utrEnd = min(gp->cdsStart, eEnd); int utrSize = utrEnd - utrStart; if (isRev) { memcpy(utr3Hits + utr3Pos, hits + utrStart, utrSize * sizeof(*hits)); memcpy(utr3Covers + utr3Pos, covers + utrStart, utrSize * sizeof(*covers)); utr3Pos += utrSize; } else { memcpy(utr5Hits + utr5Pos, hits + utrStart, utrSize * sizeof(*hits)); memcpy(utr5Covers + utr5Pos, covers + utrStart, utrSize * sizeof(*covers)); utr5Pos += utrSize; } } if (eEnd > gp->cdsEnd) { int utrStart = max(gp->cdsEnd, eStart); int utrEnd = eEnd; int utrSize = utrEnd - utrStart; if (isRev) { memcpy(utr5Hits + utr5Pos, hits + utrStart, utrSize * sizeof(*hits)); memcpy(utr5Covers + utr5Pos, covers + utrStart, utrSize * sizeof(*covers)); utr5Pos += utrSize; } else { memcpy(utr3Hits + utr3Pos, hits + utrStart, utrSize * sizeof(*hits)); memcpy(utr3Covers + utr3Pos, covers + utrStart, utrSize * sizeof(*covers)); utr3Pos += utrSize; } } } assert(utr3Pos == utr3Size); assert(utr5Pos == utr5Size); assert(cdsAllPos == cdsAllSize); tallyHits(&g->utr5, utr5Hits, utr5Covers, utr5Size, isRev); tallyHits(&g->utr3, utr3Hits, utr3Covers, utr3Size, isRev); tallyHits(&g->cdsAll, cdsAllHits, cdsAllCovers, cdsAllSize, isRev); /* Optionally write out file with gene by gene info. */ if (fParts != NULL) { /* Write header line first time through. */ static boolean firstTime = TRUE; if (firstTime) { firstTime = FALSE; fprintf(fParts, "#accession\tsize_5\tali_5\tmatch_5\tsize_c\tali_c\tmatch_c\tsize_3\tali_3\tmatch_3\n"); } fprintf(fParts, "%s\t", gp->name); fprintf(fParts, "%d\t%d\t%d\t", utr5Size, countBools(utr5Covers, utr5Size), countBools(utr5Hits, utr5Size)); fprintf(fParts, "%d\t%d\t%d\t", cdsAllSize, countBools(cdsAllCovers, cdsAllSize), countBools(cdsAllHits, cdsAllSize)); fprintf(fParts, "%d\t%d\t%d\n", utr3Size, countBools(utr3Covers, utr3Size), countBools(utr3Hits, utr3Size)); } /* Tally upstream/downstream hits. */ { int s1 = gp->txStart - closeHalf; int e1 = s1 + closeSize; int s2 = gp->txEnd - closeHalf; int e2 = s2 + closeSize; if (isRev) { tallyInRange(&g->down, hits, covers, chrom->size, gp->txStart - g->baseDown, gp->txStart, isRev); tallyInRange(&g->up, hits, covers, chrom->size, gp->txEnd, gp->txEnd + g->baseUp, isRev); tallyInRange(&g->txEnd, hits, covers, chrom->size, s1, e1, isRev); tallyInRange(&g->txStart, hits, covers, chrom->size, s2, e2, isRev); } else { tallyInRange(&g->up, hits, covers, chrom->size, gp->txStart - g->baseUp, gp->txStart, isRev); tallyInRange(&g->down, hits, covers, chrom->size, gp->txEnd, gp->txEnd + g->baseDown, isRev); tallyInRange(&g->txStart, hits, covers, chrom->size, s1, e1, isRev); tallyInRange(&g->txEnd, hits, covers, chrom->size, s2, e2, isRev); } } /* Tally hits in coding exons */ for (exonIx=0; exonIx < gp->exonCount; ++exonIx) { int eStart = gp->exonStarts[exonIx]; int eEnd = gp->exonEnds[exonIx]; /* Single coding exon. */ if (eStart <= gp->cdsStart && eEnd >= gp->cdsEnd) { eStart = gp->cdsStart; eEnd = gp->cdsEnd; tallyInRange(&g->cdsSingle, hits, covers, chrom->size, eStart, eEnd, isRev); } /* Initial coding exon */ else if (eStart < gp->cdsStart && eEnd > gp->cdsStart) { int cs = gp->cdsStart - closeHalf; int ce = cs + closeSize; eStart = gp->cdsStart; if (isRev) { tallyInRange(&g->tlEnd, hits, covers, chrom->size, cs, ce, isRev); tallyInRange(&g->cdsLast, hits, covers, chrom->size, eStart, eEnd, isRev); } else { tallyInRange(&g->tlStart, hits, covers, chrom->size, cs, ce, isRev); tallyInRange(&g->cdsFirst, hits, covers, chrom->size, eStart, eEnd, isRev); } } /* Final coding exon */ else if (eStart < gp->cdsEnd && eEnd > gp->cdsEnd) { int cs = gp->cdsEnd - closeHalf; int ce = cs + closeSize; eEnd = gp->cdsEnd; if (isRev) { tallyInRange(&g->tlStart, hits, covers, chrom->size, cs, ce, isRev); tallyInRange(&g->cdsFirst, hits, covers, chrom->size, eStart, eEnd, isRev); } else { tallyInRange(&g->tlEnd, hits, covers, chrom->size, cs, ce, isRev); tallyInRange(&g->cdsLast, hits, covers, chrom->size, eStart, eEnd, isRev); } } /* Middle (but not only) coding exon */ else if (eStart >= gp->cdsStart && eEnd <= gp->cdsEnd) { tallyInRange(&g->cdsMiddle, hits, covers, chrom->size, eStart, eEnd, isRev); } else { } } /* Tally hits in introns and splice sites. */ for (exonIx=1; exonIx<gp->exonCount; ++exonIx) { int iStart = gp->exonEnds[exonIx-1]; int iEnd = gp->exonStarts[exonIx]; int s1 = iStart - closeHalf; int e1 = s1 + closeSize; int s2 = iEnd - closeHalf; int e2 = s2 + closeSize; if (isRev) { tallyInRange(&g->splice3, hits, covers, chrom->size, s1, e1, isRev); tallyInRange(&g->splice5, hits, covers, chrom->size, s2, e2, isRev); } else { tallyInRange(&g->splice5, hits, covers, chrom->size, s1, e1, isRev); tallyInRange(&g->splice3, hits, covers, chrom->size, s2, e2, isRev); } tallyInRange(&g->intron, hits, covers, chrom->size, iStart, iEnd, isRev); } freez(&utr5Hits); freez(&utr3Hits); freez(&cdsAllHits); freez(&utr5Covers); freez(&utr3Covers); freez(&cdsAllCovers); } freez(&hits); freez(&covers); lineFileClose(&lf); }
void twinOrfStats(char *axtFile, char *raFile, char *outFile) /* twinOrfStats - Collect stats on refSeq cDNAs aligned to another species via axtForEst. */ { struct hash *rsiHash = readRefRa(raFile); struct lineFile *lf = lineFileOpen(axtFile, TRUE); FILE *f = mustOpen(outFile, "w"); struct axt *axt; static struct c1Counts c1Kozak[10], c1all, c1utr5, c1utr3, c1cds; static struct c2Counts c2Kozak[10], c2All, c2Utr5, c2Utr3, c2Cds; static struct c3Counts c3All, c3Utr5, c3Utr3, c3Cds; char label[64]; char *predictFile = optionVal("predict", NULL); int i; static struct c3Counts cod1, cod2, cod3, stop, earlyCod1, earlyCod2, earlyCod3; int earlySize; initC3Counts(&cod1, 0); initC3Counts(&cod2, 0); initC3Counts(&cod3, 0); initC3Counts(&earlyCod1, 0); initC3Counts(&earlyCod2, 0); initC3Counts(&earlyCod3, 0); initC3Counts(&c3Utr3, 0); initC3Counts(&c3Utr5, 0); initC3Counts(&stop, 0); threshold = optionFloat("threshold", threshold); earlyAaSize = optionInt("earlyAaSize", earlyAaSize); earlySize = 3*earlyAaSize; while ((axt = axtRead(lf)) != NULL) { struct refSeqInfo *rsi = hashFindVal(rsiHash, axt->tName); if (rsi != NULL && rsi->cdsStart >= 6) { if (checkAtg(axt, rsi->cdsStart)) { for (i=0; i<10; ++i) addPos(&c1Kozak[i], &c2Kozak[i], axt, rsi->cdsStart - 5 + i); addRange(&c1all, &c2All, &c3All, axt, 0, rsi->size); addRange(&c1utr5, &c2Utr5, &c3Utr5, axt, 0, rsi->cdsStart); addRange(&c1cds, &c2Cds, &c3Cds, axt, rsi->cdsStart, rsi->cdsEnd); addRange(&c1utr3, &c2Utr3, &c3Utr3, axt, rsi->cdsEnd, rsi->size); /* The +3+1 in the expression below breaks down as so: the * +3 is to move past the first 'ATG' codon, which is part of * the Kozak consensus model, not the coding model. The +1 * is so that we look at the 2nd and 3rd bases of the previous * codon, and the first base of the current codon. */ addCodons(&earlyCod1, axt, rsi->cdsStart+3+1, rsi->cdsStart+1+earlySize); addCodons(&earlyCod2, axt, rsi->cdsStart+3+2, rsi->cdsStart+2+earlySize); addCodons(&earlyCod3, axt, rsi->cdsStart+3+3, rsi->cdsStart+3+earlySize); addCodons(&cod1, axt, rsi->cdsStart+3+1+earlySize, rsi->cdsEnd-5); addCodons(&cod2, axt, rsi->cdsStart+3+2+earlySize, rsi->cdsEnd-4); addCodons(&cod3, axt, rsi->cdsStart+3+3+earlySize, rsi->cdsEnd-3); addCodons(&stop, axt, rsi->cdsEnd-3, rsi->cdsEnd); } } axtFree(&axt); } lineFileClose(&lf); dumpC1(f, &c1all, "c1_all"); dumpC2(f, &c2All, "c2_all"); dumpC3(f, &c3All, "c3_all"); dumpC1(f, &c1utr5, "c1_utr5"); dumpC2(f, &c2Utr5, "c2_utr5"); dumpC3(f, &c3Utr5, "c3_utr5"); dumpC1(f, &c1cds, "c1_cds"); dumpC2(f, &c2Cds, "c2_cds"); dumpC3(f, &c3Cds, "c3_cds"); dumpC1(f, &c1utr3, "c1_utr3"); dumpC2(f, &c2Utr3, "c2_utr3"); dumpC3(f, &c3Utr3, "c3_utr3"); for (i=0; i<10; ++i) { sprintf(label, "c1_kozak[%d]", i-5); dumpC1(f, &c1Kozak[i], label); sprintf(label, "c2_kozak[%d]", i-5); dumpC2(f, &c2Kozak[i], label); } dumpC3(f, &earlyCod1, "earlyCod1"); dumpC3(f, &earlyCod2, "earlyCod2"); dumpC3(f, &earlyCod3, "earlyCod3"); dumpC3(f, &cod1, "cod1"); dumpC3(f, &cod2, "cod2"); dumpC3(f, &cod3, "cod3"); dumpC3(f, &stop, "stop"); if (predictFile) { predict(c1Kozak, &c1all, axtFile, predictFile, rsiHash); } }
void checkExp(char *bedFileName, char *tNibDir, char *nibList) { struct lineFile *bf = lineFileOpen(bedFileName , TRUE), *af = NULL; char *row[PSEUDOGENELINK_NUM_COLS] ; struct pseudoGeneLink *ps; char *tmpName[512], cmd[512]; struct axt *axtList = NULL, *axt, *mAxt = NULL; struct dnaSeq *qSeq = NULL, *tSeq = NULL, *seqList = NULL; struct nibInfo *qNib = NULL, *tNib = NULL; FILE *op; int ret; if (nibHash == NULL) nibHash = hashNew(0); while (lineFileNextRow(bf, row, ArraySize(row))) { struct misMatch *misMatchList = NULL; struct binKeeper *bk = NULL; struct binElement *el, *elist = NULL; struct psl *mPsl = NULL, *rPsl = NULL, *pPsl = NULL, *psl ; struct misMatch *mf = NULL; ps = pseudoGeneLinkLoad(row); tmpName[0] = cloneString(ps->name); chopByChar(tmpName[0], '.', tmpName, sizeof(tmpName)); verbose(2,"name %s %s:%d-%d\n", ps->name, ps->chrom, ps->chromStart,ps->chromEnd); /* get expressed retro from hash */ bk = hashFindVal(mrnaHash, ps->chrom); elist = binKeeperFindSorted(bk, ps->chromStart, ps->chromEnd ) ; for (el = elist; el != NULL ; el = el->next) { rPsl = el->val; verbose(2,"retroGene %s %s:%d-%d\n",rPsl->qName, ps->chrom, ps->chromStart,ps->chromEnd); } /* find mrnas that overlap parent gene */ bk = hashFindVal(mrnaHash, ps->gChrom); elist = binKeeperFindSorted(bk, ps->gStart , ps->gEnd ) ; for (el = elist; el != NULL ; el = el->next) { pPsl = el->val; verbose(2,"parent %s %s:%d %d,%d\n", pPsl->qName, pPsl->tName,pPsl->tStart, pPsl->match, pPsl->misMatch); } /* find self chain */ bk = hashFindVal(chainHash, ps->chrom); elist = binKeeperFind(bk, ps->chromStart , ps->chromEnd ) ; slSort(&elist, chainCmpScoreDesc); for (el = elist; el != NULL ; el = el->next) { struct chain *chain = el->val, *subChain, *retChainToFree, *retChainToFree2; int qs = chain->qStart; int qe = chain->qEnd; int id = chain->id; if (chain->qStrand == '-') { qs = chain->qSize - chain->qEnd; qe = chain->qSize - chain->qStart; } if (!sameString(chain->qName , ps->gChrom) || !positiveRangeIntersection(qs, qe, ps->gStart, ps->gEnd)) { verbose(2," wrong chain %s:%d-%d %s:%d-%d parent %s:%d-%d\n", chain->qName, qs, qe, chain->tName,chain->tStart,chain->tEnd, ps->gChrom,ps->gStart,ps->gEnd); continue; } verbose(2,"chain id %d %4.0f",chain->id, chain->score); chainSubsetOnT(chain, ps->chromStart+7, ps->chromEnd-7, &subChain, &retChainToFree); if (subChain != NULL) chain = subChain; chainSubsetOnQ(chain, ps->gStart, ps->gEnd, &subChain, &retChainToFree2); if (subChain != NULL) chain = subChain; if (chain->qStrand == '-') { qs = chain->qSize - chain->qEnd; qe = chain->qSize - chain->qStart; } verbose(2," %s:%d-%d %s:%d-%d ", chain->qName, qs, qe, chain->tName,chain->tStart,chain->tEnd); if (subChain != NULL) verbose(2,"subChain %s:%d-%d %s:%d-%d\n", subChain->qName, subChain->qStart, subChain->qEnd, subChain->tName,subChain->tStart,subChain->tEnd); qNib = nibInfoFromCache(nibHash, tNibDir, chain->qName); tNib = nibInfoFromCache(nibHash, tNibDir, chain->tName); tSeq = nibInfoLoadStrand(tNib, chain->tStart, chain->tEnd, '+'); qSeq = nibInfoLoadStrand(qNib, chain->qStart, chain->qEnd, chain->qStrand); axtList = chainToAxt(chain, qSeq, chain->qStart, tSeq, chain->tStart, maxGap, BIGNUM); verbose(2,"axt count %d misMatch cnt %d\n",slCount(axtList), slCount(misMatchList)); for (axt = axtList; axt != NULL ; axt = axt->next) { addMisMatch(&misMatchList, axt, chain->qSize); } verbose(2,"%d in mismatch list %s id %d \n",slCount(misMatchList), chain->qName, id); chainFree(&retChainToFree); chainFree(&retChainToFree2); break; } /* create axt of each expressed retroGene to parent gene */ /* get alignment for each mrna overlapping retroGene */ bk = hashFindVal(mrnaHash, ps->chrom); elist = binKeeperFindSorted(bk, ps->chromStart , ps->chromEnd ) ; { char queryName[512]; char axtName[512]; char pslName[512]; safef(queryName, sizeof(queryName), "/tmp/query.%s.fa", ps->chrom); safef(axtName, sizeof(axtName), "/tmp/tmp.%s.axt", ps->chrom); safef(pslName, sizeof(pslName), "/tmp/tmp.%s.psl", ps->chrom); op = fopen(pslName,"w"); for (el = elist ; el != NULL ; el = el->next) { psl = el->val; pslOutput(psl, op, '\t','\n'); qSeq = twoBitReadSeqFrag(twoBitFile, psl->qName, 0, 0); if (qSeq != NULL) slAddHead(&seqList, qSeq); else errAbort("seq %s not found \n", psl->qName); } fclose(op); faWriteAll(queryName, seqList); safef(cmd,sizeof(cmd),"pslPretty -long -axt %s %s %s %s",pslName , nibList, queryName, axtName); ret = system(cmd); if (ret != 0) errAbort("ret is %d %s\n",ret,cmd); verbose(2, "ret is %d %s\n",ret,cmd); af = lineFileOpen(axtName, TRUE); while ((axt = axtRead(af)) != NULL) slAddHead(&mAxt, axt); lineFileClose(&af); } slReverse(&mAxt); /* for each parent/retro pair, count bases matching retro and parent better */ for (el = elist; el != NULL ; el = el->next) { int i, scoreRetro=0, scoreParent=0, scoreNeither=0; struct dyString *parentMatch = newDyString(16*1024); struct dyString *retroMatch = newDyString(16*1024); mPsl = el->val; if (mAxt != NULL) { verbose(2,"mrna %s %s:%d %d,%d axt %s\n", mPsl->qName, mPsl->tName,mPsl->tStart, mPsl->match, mPsl->misMatch, mAxt->qName); assert(sameString(mPsl->qName, mAxt->qName)); for (i = 0 ; i< (mPsl->tEnd-mPsl->tStart) ; i++) { int j = mAxt->tStart - mPsl->tStart; verbose(5, "listLen = %d\n",slCount(&misMatchList)); if ((mf = matchFound(&misMatchList, (mPsl->tStart)+i)) != NULL) { if (toupper(mf->retroBase) == toupper(mAxt->qSym[j+i])) { verbose (3,"match retro[%d] %d %c == %c parent %c %d\n", i,mf->retroLoc, mf->retroBase, mAxt->qSym[j+i], mf->parentBase, mf->parentLoc); dyStringPrintf(retroMatch, "%d,", mf->retroLoc); scoreRetro++; } else if (toupper(mf->parentBase) == toupper(mAxt->qSym[j+i])) { verbose (3,"match parent[%d] %d %c == %c retro %c %d\n", i,mf->parentLoc, mf->parentBase, mAxt->qSym[j+i], mf->retroBase, mf->retroLoc); dyStringPrintf(parentMatch, "%d,", mf->parentLoc); scoreParent++; } else { verbose (3,"match neither[%d] %d %c != %c retro %c %d\n", i,mf->parentLoc, mf->parentBase, mAxt->tSym[j+i], mf->retroBase, mf->retroLoc); scoreNeither++; } } } verbose(2,"final score %s parent %d retro %d neither %d\n", mPsl->qName, scoreParent, scoreRetro, scoreNeither); fprintf(outFile,"%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%s\t%d\t%d\t%d\t%s\t%s\n", ps->chrom, ps->chromStart, ps->chromEnd, ps->name, ps->score, mPsl->tName, mPsl->tStart, mPsl->tEnd, mPsl->qName, scoreParent, scoreRetro, scoreNeither, parentMatch->string, retroMatch->string); mAxt = mAxt->next; } dyStringFree(&parentMatch); dyStringFree(&retroMatch); } } }
void liftAxt(char *destFile, struct hash *liftHash, int sourceCount, char *sources[], boolean querySide) /* Lift up coordinates in .axt file. */ { FILE *f = mustOpen(destFile, "w"); int sourceIx; int dotMod = dots; for (sourceIx = 0; sourceIx < sourceCount; ++sourceIx) { char *source = sources[sourceIx]; struct lineFile *lf = lineFileOpen(source, TRUE); struct axt *axt; lineFileSetMetaDataOutput(lf, f); verbose(1, "Lifting %s\n", source); while ((axt = axtRead(lf)) != NULL) { struct liftSpec *spec; struct axt a = *axt; char *seqName; if (querySide) seqName = a.qName; else seqName = a.tName; spec = findLift(liftHash, seqName, lf); if (spec == NULL) { if (how != carryMissing) { axtFree(&axt); continue; } } else { int offset; char strand = (querySide ? a.qStrand : a.tStrand); cantHandleSpecRevStrand(spec); if (strand == '-') { int ctgEnd = spec->offset + spec->oldSize; offset = spec->newSize - ctgEnd; } else offset = spec->offset; if (querySide) { a.qStart += offset; a.qEnd += offset; a.qName = spec->newName; } else { a.tStart += offset; a.tEnd += offset; a.tName = spec->newName; if (strand == '-') warn("Target minus strand, please double check results."); } } axtWrite(&a, f); axtFree(&axt); doDots(&dotMod); } lineFileClose(&lf); if (dots) verbose(1, "\n"); } }
void axtHiQualDiffs(char *axtFile, struct hash *qacHash, FILE *f) /* Write out high quality diffs in axtFile to f. */ { char *qName = cloneString(""); UBYTE *qQuals = NULL; UBYTE *quals = NULL; struct qac *qac = NULL; struct axt *axt = NULL; struct lineFile *lf = lineFileOpen(axtFile, TRUE); int qStart, qDir, qPos, qWinStart, qWinEnd, tPos; int qWinSize = optionInt("winSize", 11); int qQualMin = optionInt("diffQualMin", 30); int qWinQualMin = optionInt("winQualMin", 25); int qWinMaxDiff = optionInt("winMaxDiff", 2); boolean qIndelOk = optionExists("indelOk"); boolean qIgnore98 = optionExists("ignore98"); boolean chimpPos = optionExists("chimpPos"); int qHalfWinSize = qWinSize/2; while ((axt = axtRead(lf)) != NULL) { char *qSym = axt->qSym, *tSym = axt->tSym; int symIx, symCount = axt->symCount; char qc,tc; toUpperN(qSym, symCount); toUpperN(tSym, symCount); if (!sameString(axt->qName, qName)) { freez(&qName); qName = cloneString(axt->qName); qac = hashMustFindVal(qacHash, qName); freez(&qQuals); qQuals = needHugeMem(qac->uncSize); rleUncompress(qac->data, qac->compSize, qQuals, qac->uncSize); } if (axt->qStrand == '+') { qStart = axt->qStart; qDir = 1; } else { qStart = qac->uncSize - axt->qStart - 1; qDir = -1; } qPos = qStart; tPos = axt->tStart; for (symIx = 0; symIx < symCount; ++symIx) { qc = qSym[symIx]; tc = tSym[symIx]; if (qc == '-') tPos += 1; else if (tc == '-') qPos += qDir; else { if (qc != tc) { qWinStart = qPos - qHalfWinSize; qWinEnd = qWinStart + qWinSize; if (qWinStart >= 0 && qWinEnd < qac->uncSize) { if (qQuals[qPos] >= qQualMin) { int i; boolean ok = TRUE; for (i = qWinStart; i<qWinEnd; ++i) if (qQuals[i] < qWinQualMin) { ok = FALSE; break; } if (ok) { int diffCount = 0; int symWinStart = symIx - qHalfWinSize; int symWinEnd = symWinStart + qWinSize; for (i=symWinStart; i < symWinEnd; ++i) { qc = qSym[i]; tc = tSym[i]; if (qc == '-' || tc == '-') { ok = FALSE; break; } if (qc != tc) ++diffCount; } if (ok && diffCount <= qWinMaxDiff && (!qIgnore98 || qQuals[qPos] != 98) ) { if (chimpPos) fprintf(f, "%s\t%d\t%d\t%c\t%c\t%s\t%d\t%d\n", axt->tName, tPos, tPos+1, tSym[symIx], qSym[symIx], axt->qName, qPos, qPos+1); else fprintf(f, "%s\t%d\t%d\t%c\t%c\n", axt->tName, tPos, tPos+1, tSym[symIx], qSym[symIx]); } } } } } qPos += qDir; tPos += 1; } } axtFree(&axt); } lineFileClose(&lf); }
void axtCalcMatrix(int fileCount, char *files[]) /* axtCalcMatrix - Calculate substitution matrix and make indel histogram. */ { int *histIns, *histDel, *histPerfect, *histGapless, *histT, *histQ; int maxInDel = optionInt("maxInsert", 21); static int matrix[4][4]; static char bestGapless[256], bestPerfect[256]; int i, j, total = 0; double scale; int fileIx; struct axt *axt; static int trans[4] = {A_BASE_VAL, C_BASE_VAL, G_BASE_VAL, T_BASE_VAL}; static char *bases[4] = {"A", "C", "G", "T"}; int totalT = 0, totalMatch = 0, totalMismatch = 0, tGapStart = 0, tGapExt=0, qGapStart = 0, qGapExt = 0; AllocArray(histIns, maxInDel+1); AllocArray(histDel, maxInDel+1); AllocArray(histPerfect, maxPerfect+1); AllocArray(histGapless, maxPerfect+1); AllocArray(histT, maxInDel+1); AllocArray(histQ, maxInDel+1); for (fileIx = 0; fileIx < fileCount; ++fileIx) { char *fileName = files[fileIx]; struct lineFile *lf = lineFileOpen(fileName, TRUE); while ((axt = axtRead(lf)) != NULL) { totalT += axt->tEnd - axt->tStart; addMatrix(matrix, axt->tSym, axt->qSym, axt->symCount); addInsert(histIns, maxInDel, axt->tSym, axt->symCount, &tGapStart, &tGapExt); addInsert(histDel, maxInDel, axt->qSym, axt->symCount, &qGapStart, &qGapExt); addPerfect(axt, histPerfect, maxPerfect, axt->qSym, axt->tSym, axt->symCount, bestPerfect); addGapless(axt, histGapless, maxPerfect, axt->qSym, axt->tSym, axt->symCount, bestGapless); axtFree(&axt); } lineFileClose(&lf); } printf(" "); for (i=0; i<4; ++i) printf("%5s ", bases[i]); printf("\n"); for (i=0; i<4; ++i) { for (j=0; j<4; ++j) { int one = matrix[i][j]; total += matrix[i][j]; if (i == j) totalMatch += one; else totalMismatch += one; } } scale = 1.0 / total; for (i=0; i<4; ++i) { int it = trans[i]; printf(" %s", bases[i]); for (j=0; j<4; ++j) { int jt = trans[j]; printf(" %5.4f", matrix[it][jt] * scale); } printf("\n"); } printf("\n"); for (i=1; i<21; ++i) { if (i == 20) printf(">="); printf("%2d %6.4f%% %6.4f%%\n", i, 100.0*histIns[i]/totalT, 100.0*histDel[i]/totalT); } #ifdef OLD for (i=0; i<100; i += 10) { int delSum = 0, insSum=0, perfectSum = 0, perfectBaseSum = 0; for (j=0; j<10; ++j) { int ix = i+j; insSum += histIns[ix]; delSum += histDel[ix]; perfectSum += histPerfect[ix]; perfectBaseSum += histPerfect[ix] * ix; } printf("%2d to %2d: %6.4f%% %6.4f%% %6d %7d\n", i, i+9, 100.0*insSum/totalT, 100.0*delSum/totalT, perfectSum, perfectBaseSum); } for (i=0; i<1000; i += 100) { int delSum = 0, insSum=0, perfectSum = 0, perfectBaseSum = 0; for (j=0; j<100; ++j) { int ix = i+j; int ins = histIns[ix]; int del = histDel[ix]; both = ins + del; insSum += ins; delSum += del; perfectSum += histPerfect[ix]; perfectBaseSum += histPerfect[ix] * ix; } printf("%3d to %3d: %6.4f%% %6.4f%% %6d %7d\n", i, i+99, 100.0*insSum/totalT, 100.0*delSum/totalT, perfectSum, perfectBaseSum); } printf(">1000 %6.4f%% %6.4f%% %6d %7d\n", 100.0*histIns[1000]/totalT, 100.0*histDel[1000]/totalT, histPerfect[1000], histPerfect[1000]*1000); both = histIns[1000] + histDel[1000]; #endif /* OLD */ printf("\n"); printMedianEtc("perfect", histPerfect, maxPerfect, bestPerfect); printMedianEtc("gapless", histGapless, maxPerfect, bestGapless); printf("\n"); printLabeledPercent("totalT: ", totalT, totalT); printLabeledPercent("matches: ", totalMatch, totalT); printLabeledPercent("mismatches:", totalMismatch, totalT); printLabeledPercent("tGapStart: ", tGapStart, totalT); printLabeledPercent("qGapStart: ", qGapStart, totalT); printLabeledPercent("tGapExt: ", tGapExt, totalT); printLabeledPercent("qGapExt: ", qGapExt, totalT); printLabeledPercent("baseId: ", totalMatch, totalMatch+totalMismatch); }