struct axt *createAxtGap(char *nibFile, char *chrom, int start, int end, char strand) /* return an axt alignment with the query all deletes - null aligment */ { struct axt *axt; int size = end-start; char *gapPt = needLargeMem(size+1); char *p; struct dnaSeq *seq = NULL; for (p=gapPt;p<=gapPt+size;p++) *p = '-'; AllocVar(axt); axt->tName = chrom; axt->tStart = start; axt->tEnd = end; axt->tStrand = strand; axt->qName = "gap"; axt->qStart = 1; axt->qEnd = size; axt->qStrand = strand; axt->symCount = size; axt->score = 0; seq = nibLoadPart(nibFile, start,size); axt->tSym = cloneMem(seq->dna, size+1); axt->qSym = cloneMem(gapPt, size+1); return axt; }
struct bed *bedFromGenePred(struct genePred *genePred) /* Convert a single genePred to a bed structure */ { struct bed *bed; int i, blockCount, *chromStarts, *blockSizes, chromStart; /* A tiny bit of error checking on the genePred. */ if (genePred->txStart >= genePred->txEnd || genePred->cdsStart > genePred->cdsEnd) { errAbort("mangled genePred format for %s", genePred->name); } /* Allocate bed and fill in from psl. */ AllocVar(bed); bed->chrom = cloneString(genePred->chrom); bed->chromStart = chromStart = genePred->txStart; bed->chromEnd = genePred->txEnd; bed->thickStart = genePred->cdsStart; bed->thickEnd = genePred->cdsEnd; bed->score = 0; strncpy(bed->strand, genePred->strand, sizeof(bed->strand)); bed->blockCount = blockCount = genePred->exonCount; bed->blockSizes = blockSizes = (int *)cloneMem(genePred->exonEnds,(sizeof(int)*genePred->exonCount)); bed->chromStarts = chromStarts = (int *)cloneMem(genePred->exonStarts, (sizeof(int)*genePred->exonCount)); bed->name = cloneString(genePred->name); /* Convert coordinates to relative and exnosEnds to blockSizes. */ for (i=0; i<blockCount; ++i) { blockSizes[i] -= chromStarts[i]; chromStarts[i] -= chromStart; } return bed; }
bam1_t *bamClone(const bam1_t *bam) /* Return a newly allocated copy of bam. */ { // Using typecasts to get around compiler complaints about bam being const: bam1_t *newBam = cloneMem((void *)bam, sizeof(*bam)); newBam->data = cloneMem((void *)bam->data, bam->data_len*sizeof(bam->data[0])); return newBam; }
static void *cloneValues(void *valuesIn, enum asTypes type) /* If valuesIn is non-null, return a copy of values according to type. */ { void *valuesOut = NULL; if (valuesIn != NULL) { if (asTypesIsFloating(type)) valuesOut = cloneMem(valuesIn, 2*sizeof(double)); else if (asTypesIsInt(type)) valuesOut = cloneMem(valuesIn, 2*sizeof(long long)); else valuesOut = cloneString((char *)valuesIn); } return valuesOut; }
static struct dnaSeq *faReadAllMixableInLf(struct lineFile *lf, boolean isDna, boolean mixed) /* Return list of all sequences from open fa file. * Mixed case parameter overrides isDna. If mixed is false then * will return DNA in lower case and non-DNA in upper case. */ { struct dnaSeq *seqList = NULL, *seq; DNA *dna; char *name; int size; boolean ok; for (;;) { if (mixed) ok = faMixedSpeedReadNext(lf, &dna, &size, &name); else ok = faSomeSpeedReadNext(lf, &dna, &size, &name, isDna); if (!ok) break; AllocVar(seq); seq->name = cloneString(name); seq->size = size; seq->dna = cloneMem(dna, size+1); slAddHead(&seqList, seq); } slReverse(&seqList); faFreeFastBuf(); return seqList; }
struct bed *pslToBed(struct psl *psl) /* Convert a psl format row of strings to a bed, very similar to customTrack.c::customTrackPsl*/ { struct bed *bed; int i, blockCount, *chromStarts, chromStart; /* A tiny bit of error checking on the psl. */ if (psl->qStart >= psl->qEnd || psl->qEnd > psl->qSize || psl->tStart >= psl->tEnd || psl->tEnd > psl->tSize) { errAbort("mangled psl format for %s", psl->qName); } /* Allocate bed and fill in from psl. */ AllocVar(bed); bed->chrom = cloneString(psl->tName); bed->chromStart = bed->thickStart = chromStart = psl->tStart; bed->chromEnd = bed->thickEnd = psl->tEnd; bed->score = 1000 - 2*pslCalcMilliBad(psl, TRUE); if (bed->score < 0) bed->score = 0; strncpy(bed->strand, psl->strand, sizeof(bed->strand)); bed->blockCount = blockCount = psl->blockCount; bed->blockSizes = (int *)cloneMem(psl->blockSizes,(sizeof(int)*psl->blockCount)); bed->chromStarts = chromStarts = (int *)cloneMem(psl->tStarts, (sizeof(int)*psl->blockCount)); bed->name = cloneString(psl->qName); /* Switch minus target strand to plus strand. */ if (psl->strand[1] == '-') { int chromSize = psl->tSize; reverseInts(bed->blockSizes, blockCount); reverseInts(chromStarts, blockCount); for (i=0; i<blockCount; ++i) chromStarts[i] = chromSize - chromStarts[i]; } /* Convert coordinates to relative. */ for (i=0; i<blockCount; ++i) chromStarts[i] -= chromStart; return bed; }
struct annoRow *annoRowWigNew(char *chrom, uint start, uint end, boolean rightJoinFail, float *values) /* Allocate & return an annoRowWig, with clone of values; length of values is (end-start). */ { struct annoRow *row; AllocVar(row); row->chrom = cloneString(chrom); row->start = start; row->end = end; row->data = cloneMem(values, (end - start) * sizeof(values[0])); row->rightJoinFail = rightJoinFail; return row; }
struct qaSeq *qaReadNext(struct lineFile *lf) /* Read in next record in .qa file. */ { struct qaSeq *qa, seq; if (!qaFastReadNext(lf, &seq.qa, &seq.size, &seq.name)) return NULL; AllocVar(qa); qa->name = cloneString(seq.name); qa->size = seq.size; qa->qa = cloneMem(seq.qa, seq.size+1); return qa; }
static void fillInQa(char *qaName, struct hash *hash, struct qaSeq *qaList) /* Hash contains qaSeq's with DNA sequence but no * quality info. Fill in quality info from .qa file. */ { struct lineFile *lf = lineFileOpen(qaName, TRUE); struct qaSeq seq; while (qaFastReadNext(lf, &seq.qa, &seq.size, &seq.name)) { seq.qa = cloneMem(seq.qa, seq.size+1); attatchQaInfo(hash, seq.name, seq.qa, seq.size); } lineFileClose(&lf); checkAllPresent(qaList); }
static struct qaSeq *qaFaRead(char *qaName, char *faName, boolean mustReadQa) /* Read both QA(C) and FA files. */ { FILE *f = NULL; struct qaSeq *qaList = NULL, *qa; struct hash *hash = newHash(0); struct qaSeq seq; /* Read in all the .fa files. */ f = mustOpen(faName, "r"); while (faFastReadNext(f, &seq.dna, &seq.size, &seq.name)) { if (hashLookup(hash, seq.name) != NULL) { warn("Duplicate %s, ignoring all but first.", seq.name); continue; } AllocVar(qa); hashAdd(hash, seq.name, qa); qa->name = cloneString(seq.name); qa->dna = cloneMem(seq.dna, seq.size+1); qa->size = seq.size; slAddHead(&qaList, qa); } fclose(f); /* Read in corresponding .qa files and make sure they correspond. * If no file exists then fake it. */ if (qaName) { if (!mustReadQa && !fileExists(qaName)) { warn("No quality file %s", qaName); for (qa = qaList; qa != NULL; qa = qa->next) qaMakeFake(qa); } else { if (isQacFile(qaName)) fillInQac(qaName, hash, qaList); else fillInQa(qaName, hash, qaList); } } freeHash(&hash); slReverse(&qaList); return qaList; }
struct hash *loadChroms(char *dir) /* Load zipped chromosome files into memory. */ { FILE *f; char fastaScan[16]; safef(fastaScan, sizeof(fastaScan), "*.%s", faExtn); struct fileInfo *chromEl, *chromList = listDirX(dir, fastaScan, TRUE); struct hash *chromHash = newHash(0); struct dnaSeq *seq; char chrom[128]; char *faName; int count = 0; verbose(2, "# scanning '%s/%s'\n", dir, fastaScan); for (chromEl = chromList; chromEl != NULL; chromEl = chromEl->next) { char *fileName = chromEl->name; splitPath(fileName, NULL, chrom, NULL); chopSuffix(chrom); if (startsWith("chr0", chrom)) /* Convert chr01 to chr1, etc. */ stripChar(chrom, '0'); if (sameString(chrom, "chrmt")) strcpy(chrom, "chr17"); f = fopen(fileName, "r"); AllocVar(seq); seq->name = cloneString(chrom); if (!faFastReadNext(f, &seq->dna, &seq->size, &faName)) errAbort("Couldn't load sequence from %s", fileName); seq->dna = cloneMem(seq->dna, seq->size+1); toUpperN(seq->dna, seq->size); hashAdd(chromHash, chrom, seq); verbose(3, "# loadChrom %s '%s'\n", fileName, chrom); fclose(f); f = NULL; count++; } if (0 == count) errAbort("not fasta files found in '%s/%s'\n", dir, fastaScan); return chromHash; }
char *cloneLongString(char *s) /* Make clone of long string. */ { size_t size = strlen(s); return cloneMem(s, size+1); }
struct cutter *readGcg(char *gcgFile) /* Parse a GCG file and load it into cutter format. */ { struct lineFile *lf = lineFileOpen(gcgFile,TRUE); struct cutter *enzList = NULL; char *line = "whatever", *words[10], numWords; /* Skip to the right line. */ while (lineFileNext(lf,&line,NULL) && !startsWith("..",line)); /* */ while ((numWords=lineFileChop(lf,words))) { struct cutter *newone = NULL; int comIx = (numWords==7) ? 5 : 6; int refIx = (numWords==7) ? 6 : 7; int i; char *items[100]; /* Skip ones */ if (words[4][0] == '?') continue; AllocVar(newone); newone->semicolon = (words[0][0] == ';') ? TRUE : FALSE; /* Deal with the first few columns */ if (!isdigit(words[1][0])) errAbort("Error: expecting a number in cut site column on line %d\n", lf->lineIx+1); if (!isdigit(words[3][0]) && words[3][0]!='-') errAbort("Error: expecting a number in the overhang column on line %d\n", lf->lineIx+1); if (words[comIx][0] != '>') errAbort("Error: expecting a \'>\' in the commercial sources column of line %d\n", lf->lineIx+1); newone->name = (words[0][0] == ';') ? cloneString(words[0]+1) : cloneString(words[0]); newone->cut = atoi(words[1]); newone->seq = cloneString(words[2]); touppers(newone->seq); stripChar(newone->seq,'\''); stripChar(newone->seq,'_'); newone->size = strlen(newone->seq); newone->matchSize = newone->size - countChars(newone->seq, 'N'); newone->palindromic = isPalindrome(newone->seq); newone->overhang = atoi(words[3]); newone->numCompanies = strlen(words[comIx]+1); if (newone->numCompanies > 0) newone->companies = cloneMem(words[comIx]+1, newone->numCompanies*sizeof(char)); newone->numRefs = chopString(words[refIx], ",", items, ArraySize(items)); AllocArray(newone->refs, newone->numRefs); for (i = 0; i < newone->numRefs; i++) { if (i == 100) errAbort("Error: Andy didn't make the array for holding references big enough\n"); if (!isdigit(items[i][0])) errAbort("Error: expecting number in references column in line %d\n", lf->lineIx+1); newone->refs[i] = atoi(items[i]); } /* Deal with isoscizomers. */ if (numWords == 8) { newone->numSciz = chopString(words[5], ",", items, ArraySize(items)); AllocArray(newone->scizs, newone->numSciz*sizeof(int)); for (i = 0; i < newone->numSciz; i++) { if (i == 100) errAbort("Error: Andy didn't make the array for having isoscizomers big enough\n"); newone->scizs[i] = cloneString(items[i]); } } else newone->numSciz = 0; slAddHead(&enzList, newone); } slReverse(&enzList); lineFileClose(&lf); return enzList; }
static void rFindMulti(struct bptFile *bpt, bits64 blockStart, void *key, struct slRef **pList) /* Find values corresponding to key and add them to pList. You'll need to * Do a slRefFreeListAndVals() on the list when done. */ { /* Seek to start of block. */ udcSeek(bpt->udc, blockStart); /* Read block header. */ UBYTE isLeaf; UBYTE reserved; bits16 i, childCount; udcMustReadOne(bpt->udc, isLeaf); udcMustReadOne(bpt->udc, reserved); boolean isSwapped = bpt->isSwapped; childCount = udcReadBits16(bpt->udc, isSwapped); int keySize = bpt->keySize; UBYTE keyBuf[keySize]; /* Place to put a key, buffered on stack. */ UBYTE valBuf[bpt->valSize]; /* Place to put a value, buffered on stack. */ if (isLeaf) { for (i=0; i<childCount; ++i) { udcMustRead(bpt->udc, keyBuf, keySize); udcMustRead(bpt->udc, valBuf, bpt->valSize); if (memcmp(key, keyBuf, keySize) == 0) { void *val = cloneMem(valBuf, bpt->valSize); refAdd(pList, val); } } } else { /* Read first key and first file offset. */ udcMustRead(bpt->udc, keyBuf, keySize); bits64 lastFileOffset = udcReadBits64(bpt->udc, isSwapped); bits64 fileOffset = lastFileOffset; int lastCmp = memcmp(key, keyBuf, keySize); /* Loop through remainder. */ for (i=1; i<childCount; ++i) { udcMustRead(bpt->udc, keyBuf, keySize); fileOffset = udcReadBits64(bpt->udc, isSwapped); int cmp = memcmp(key, keyBuf, keySize); if (lastCmp >= 0 && cmp <= 0) { bits64 curPos = udcTell(bpt->udc); rFindMulti(bpt, lastFileOffset, key, pList); udcSeek(bpt->udc, curPos); } if (cmp < 0) return; lastCmp = cmp; lastFileOffset = fileOffset; } /* If made it all the way to end, do last one too. */ rFindMulti(bpt, fileOffset, key, pList); } }
static void ffShNeedle(FILE *f, DNA *needle, int needleSize, int needleNumOffset, char *colorFlags, struct ffAli *aliList, boolean upcMatch, int cdsS, int cdsE, boolean accentRange, int accentStart, int accentEnd) /* Display the needle sequence with HTML highlighting. */ { struct cfm *cfm = cfmNew(10, 50, TRUE, FALSE, f, needleNumOffset); char *n = cloneMem(needle, needleSize); char *accentFlags = needMem(needleSize); struct ffAli *leftAli = aliList; struct ffAli *ali; long i; zeroBytes(colorFlags, needleSize); zeroBytes(accentFlags, needleSize); fprintf(f, "<PRE><TT>\n"); if (aliList != NULL) { for (leftAli = aliList; leftAli->left != NULL; leftAli = leftAli->left) ; } for (ali = leftAli; ali != NULL; ali = ali->right) { boolean utr = FALSE; int off = ali->nStart-needle; int count = ali->nEnd - ali->nStart; if ((cdsE > 0) && ((cdsS-off-1) > 0)) utr = TRUE; for (i=0; i<count; ++i) { if (!utr && (i > (cdsE-off-1)) && (cdsE > 0)) utr = TRUE; if (utr && (i == (cdsS-off))) utr = FALSE; if (toupper(ali->hStart[i]) == toupper(ali->nStart[i])) { if (utr) colorFlags[off+i] = ((i == 0 || i == count-1) ? socOrange : socRed); else colorFlags[off+i] = ((i == 0 || i == count-1) ? socBrightBlue : socBlue); if (upcMatch) n[off+i] = toupper(n[off+i]); } if (accentRange) { if (off+i >= accentStart && off+i < accentEnd) accentFlags[off+i] = TRUE; } } } for (i=0; i<needleSize; ++i) { if (accentRange && i == accentStart) fprintf(f, "<A NAME=cDNAStart></A>"); cfmOutExt(cfm, n[i], seqOutColorLookup[(int)colorFlags[i]], accentFlags[i], accentFlags[i], FALSE); } cfmFree(&cfm); freeMem(n); freeMem(accentFlags); fprintf(f, "</TT></PRE>\n"); htmHorizontalLine(f); }
static struct blastBlock *nextBlock(struct blastFile *bf, struct blastQuery *bq, struct blastGappedAli *bga, boolean *skipRet) /* Read in next blast block. Return NULL at EOF or end of gapped * alignment. If an unparsable block is found, set skipRet to TRUE and return * NULL. */ { struct blastBlock *bb; char *line; char *words[16]; int wordCount; char *parts[3]; int partCount; static struct dyString *qString = NULL, *tString = NULL; verbose(TRACE_LEVEL, "blastFileNextBlock\n"); *skipRet = FALSE; /* Seek until get something like: * Score = 8770 bits (4424), Expect = 0.0 * or something that looks like we're done with this gapped * alignment. */ for (;;) { if (!nextBlockLine(bf, bq, &line)) return NULL; if (startsWith(" Score", line)) break; } AllocVar(bb); bb->gappedAli = bga; wordCount = chopLine(line, words); if (wordCount < 8 || !sameWord("Score", words[0]) || !isdigit(words[2][0]) || !(isdigit(words[7][0]) || words[7][0] == 'e') || !startsWith("Expect", words[5])) { bfError(bf, "Expecting something like:\n" "Score = 8770 bits (4424), Expect = 0.0"); } bb->bitScore = atof(words[2]); bb->eVal = evalToDouble(words[7]); /* Process something like: * Identities = 8320/9618 (86%), Gaps = 3/9618 (0%) * or * Identities = 8320/9618 (86%) * or * Identities = 10/19 (52%), Positives = 15/19 (78%), Frame = +2 * (wu-tblastn) * or * Identities = 256/400 (64%), Positives = 306/400 (76%) * Frame = +1 / -2 * (tblastn) * * Identities = 1317/10108 (13%), Positives = 2779/10108 (27%), Gaps = 1040/10108 * (10%) * - wrap on long lines * * Handle weird cases where the is only a `Score' line, with no `Identities' * lines by skipping the alignment; they seem line small, junky alignments. */ line = bfNeedNextLine(bf); wordCount = chopLine(line, words); if (wordCount < 3 || !sameWord("Identities", words[0])) { if (wordCount > 1 || sameWord("Score", words[0])) { /* ugly hack to skip block with no identities */ *skipRet = TRUE; blastBlockFree(&bb); return NULL; } bfError(bf, "Expecting identity count"); } partCount = chopByChar(words[2], '/', parts, ArraySize(parts)); if (partCount != 2 || !isdigit(parts[0][0]) || !isdigit(parts[1][0])) bfSyntax(bf); bb->matchCount = atoi(parts[0]); bb->totalCount = atoi(parts[1]); if (wordCount >= 7 && sameWord("Gaps", words[4])) { if (!isdigit(words[6][0])) bfSyntax(bf); bb->insertCount = atoi(words[6]); } if ((wordCount >= 11) && sameWord("Frame", words[8])) { bb->qStrand = '+'; bb->tStrand = words[10][0]; bb->tFrame = atoi(words[10]); } line = bfNeedNextLine(bf); boolean wrapped = (startsWith("(", line)); /* Process something like: * Strand = Plus / Plus (blastn) * Frame = +1 (tblastn) * Frame = +1 / -2 (tblastx) * <blank line> (blastp) * note that wu-tblastn puts frame on Identities line */ if (wrapped) line = bfNeedNextLine(bf); wordCount = chopLine(line, words); if ((wordCount >= 5) && sameWord("Strand", words[0])) { bb->qStrand = getStrand(bf, words[2]); bb->tStrand = getStrand(bf, words[4]); } else if ((wordCount >= 5) && sameWord("Frame", words[0]) && (words[3][0] == '/')) { // Frame = +1 / -2 (tblastx) bb->qStrand = (words[2][0] == '-') ? -1 : 1; bb->tStrand = (words[4][0] == '-') ? -1 : 1; bb->qFrame = atoi(words[2]); bb->tFrame = atoi(words[4]); } else if ((wordCount >= 3) && sameWord("Frame", words[0])) { // Frame = +1 (tblastn) bb->qStrand = 1; bb->tStrand = (words[2][0] == '-') ? -1 : 1; bb->qFrame = atoi(words[2]); bb->tFrame = 1; } else if (wordCount == 0) { /* if we didn't parse frame, default it */ if (bb->qStrand == 0) { bb->qStrand = '+'; bb->tStrand = '+'; } } else bfError(bf, "Expecting Strand, Frame or blank line"); /* Process alignment lines. They come in groups of three * separated by a blank line - something like: * Query: 26429 taccttgacattcctcagtgtgtcatcatcgttctctcctccaaacggcgagagtccgga 26488 * |||||| |||||||||| ||| ||||||||||||||||||||||| || || |||||||| * Sbjct: 62966 taccttaacattcctcaatgtttcatcatcgttctctcctccaaatggtgaaagtccgga 63025 */ if (qString == NULL) { qString = newDyString(50000); tString = newDyString(50000); } clearBlastBlock(bb, qString, tString); for (;;) { if (!findBlockSeqPair(bf, bq)) break; parseBlockSeqPair(bf, bb, qString, tString); } /* convert to [0..n) and move to strand coords if necessary */ bb->qStart--; if (bb->qStrand < 0) reverseIntRange(&bb->qStart, &bb->qEnd, bq->queryBaseCount); bb->tStart--; if (bb->tStrand < 0) reverseIntRange(&bb->tStart, &bb->tEnd, bga->targetSize); bb->qSym = cloneMem(qString->string, qString->stringSize+1); bb->tSym = cloneMem(tString->string, tString->stringSize+1); return bb; }
struct gapCalc *gapCalcRead(struct lineFile *lf) /* Create gapCalc from open file. */ { int i, tableSize, startLong = -1; struct gapCalc *gapCalc; int *gapInitPos; double *gapInitQGap; double *gapInitTGap; double *gapInitBothGap; AllocVar(gapCalc); /* Parse file. */ readTaggedNumLine(lf, "tableSize", 1, &tableSize, NULL); readTaggedNumLine(lf, "smallSize", 1, &gapCalc->smallSize, NULL); AllocArray(gapInitPos,tableSize); AllocArray(gapInitQGap,tableSize); AllocArray(gapInitTGap,tableSize); AllocArray(gapInitBothGap,tableSize); readTaggedNumLine(lf, "position", tableSize, gapInitPos, NULL); readTaggedNumLine(lf, "qGap", tableSize, NULL, gapInitQGap); readTaggedNumLine(lf, "tGap", tableSize, NULL, gapInitTGap); readTaggedNumLine(lf, "bothGap", tableSize, NULL, gapInitBothGap); /* Set up precomputed interpolations for small gaps. */ AllocArray(gapCalc->qSmall, gapCalc->smallSize); AllocArray(gapCalc->tSmall, gapCalc->smallSize); AllocArray(gapCalc->bSmall, gapCalc->smallSize); for (i=1; i<gapCalc->smallSize; ++i) { gapCalc->qSmall[i] = interpolate(i, gapInitPos, gapInitQGap, tableSize); gapCalc->tSmall[i] = interpolate(i, gapInitPos, gapInitTGap, tableSize); gapCalc->bSmall[i] = interpolate(i, gapInitPos, gapInitBothGap, tableSize); } /* Set up to handle intermediate values. */ for (i=0; i<tableSize; ++i) { if (gapCalc->smallSize == gapInitPos[i]) { startLong = i; break; } } if (startLong < 0) errAbort("No position %d in gapCalcRead()\n", gapCalc->smallSize); gapCalc->longCount = tableSize - startLong; gapCalc->qPosCount = tableSize - startLong; gapCalc->tPosCount = tableSize - startLong; gapCalc->bPosCount = tableSize - startLong; gapCalc->longPos = cloneMem(gapInitPos + startLong, gapCalc->longCount * sizeof(int)); gapCalc->qLong = cloneMem(gapInitQGap + startLong, gapCalc->qPosCount * sizeof(double)); gapCalc->tLong = cloneMem(gapInitTGap + startLong, gapCalc->tPosCount * sizeof(double)); gapCalc->bLong = cloneMem(gapInitBothGap + startLong, gapCalc->bPosCount * sizeof(double)); /* Set up to handle huge values. */ gapCalc->qLastPos = gapCalc->longPos[gapCalc->qPosCount-1]; gapCalc->tLastPos = gapCalc->longPos[gapCalc->tPosCount-1]; gapCalc->bLastPos = gapCalc->longPos[gapCalc->bPosCount-1]; gapCalc->qLastPosVal = gapCalc->qLong[gapCalc->qPosCount-1]; gapCalc->tLastPosVal = gapCalc->tLong[gapCalc->tPosCount-1]; gapCalc->bLastPosVal = gapCalc->bLong[gapCalc->bPosCount-1]; gapCalc->qLastSlope = calcSlope(gapCalc->qLastPosVal, gapCalc->qLong[gapCalc->qPosCount-2], gapCalc->qLastPos, gapCalc->longPos[gapCalc->qPosCount-2]); gapCalc->tLastSlope = calcSlope(gapCalc->tLastPosVal, gapCalc->tLong[gapCalc->tPosCount-2], gapCalc->tLastPos, gapCalc->longPos[gapCalc->tPosCount-2]); gapCalc->bLastSlope = calcSlope(gapCalc->bLastPosVal, gapCalc->bLong[gapCalc->bPosCount-2], gapCalc->bLastPos, gapCalc->longPos[gapCalc->bPosCount-2]); freez(&gapInitPos); freez(&gapInitQGap); freez(&gapInitTGap); freez(&gapInitBothGap); return gapCalc; }
int ffShAliPart(FILE *f, struct ffAli *aliList, char *needleName, DNA *needle, int needleSize, int needleNumOffset, char *haystackName, DNA *haystack, int haySize, int hayNumOffset, int blockMaxGap, boolean rcNeedle, boolean rcHaystack, boolean showJumpTable, boolean showNeedle, boolean showHaystack, boolean showSideBySide, boolean upcMatch, int cdsS, int cdsE, int hayPartS, int hayPartE) /* Display parts of alignment on html page. If hayPartS..hayPartE is a * smaller subrange of the alignment, highlight that part of the alignment * in both needle and haystack with underline & bold, and show only that * part of the haystack (plus padding). Returns number of blocks (after * merging blocks separated by blockMaxGap or less). */ { long i; struct ffAli *ali; struct ffAli *lastAli; struct ffAli *leftAli = aliList; struct ffAli *rightAli = aliList; int maxSize = (needleSize > haySize ? needleSize : haySize); char *colorFlags = needMem(maxSize); int anchorCount = 0; boolean restrictToWindow = FALSE; int hayOffStart = 0, hayOffEnd = haySize; int hayPaddedOffStart = 0, hayPaddedOffEnd = haySize; int hayExtremity = rcHaystack ? (hayNumOffset + haySize) : hayNumOffset; int nPartS=0, nPartE=0; if (aliList != NULL) { while (leftAli->left != NULL) leftAli = leftAli->left; while (rightAli->right != NULL) rightAli = rightAli->right; } /* If we are only showing part of the alignment, translate haystack window * coords to needle window coords and haystack-offset window coords: */ if (hayPartS > (hayNumOffset + (leftAli->hStart - haystack)) || (hayPartE > 0 && hayPartE < (hayNumOffset + (rightAli->hEnd - haystack)))) { DNA *haystackPartS; DNA *haystackPartE; restrictToWindow = TRUE; if (rcHaystack) { haystackPartS = haystack + (haySize - (hayPartE - hayNumOffset)); haystackPartE = haystack + (haySize - (hayPartS - hayNumOffset)); } else { haystackPartS = haystack + hayPartS - hayNumOffset; haystackPartE = haystack + hayPartE - hayNumOffset; } boolean foundStart = FALSE; hayOffStart = haystackPartS - haystack; hayOffEnd = haystackPartE - haystack; for (ali = leftAli; ali != NULL; ali = ali->right) { if (haystackPartS < ali->hEnd && !foundStart) { int offset = haystackPartS - ali->hStart; if (offset < 0) offset = 0; nPartS = offset + ali->nStart - needle; hayOffStart = offset + ali->hStart - haystack; foundStart = TRUE; } if (haystackPartE > ali->hStart) { if (haystackPartE > ali->hEnd) { nPartE = ali->nEnd - needle; hayOffEnd = ali->hEnd - haystack; } else { nPartE = haystackPartE - ali->hStart + ali->nStart - needle; hayOffEnd = haystackPartE - haystack; } } } hayPaddedOffStart = max(0, (hayOffStart - 100)); hayPaddedOffEnd = min(haySize, (hayOffEnd + 100)); if (rcHaystack) hayExtremity = hayNumOffset + haySize - hayPaddedOffStart; else hayExtremity = hayNumOffset + hayPaddedOffStart; } if (showJumpTable) { fputs("<CENTER><P><TABLE BORDER=1 WIDTH=\"97%\"><TR>", f); fputs("<TD WIDTH=\"23%\"><P ALIGN=CENTER><A HREF=\"#cDNA\">cDNA Sequence</A></TD>", f); if (restrictToWindow) fputs("<TD WIDTH=\"23%\"><P ALIGN=CENTER><A HREF=\"#cDNAStart\">cDNA Sequence in window</A></TD>", f); fputs("<TD WIDTH=\"27%\"><P ALIGN=\"CENTER\"><A HREF=\"#genomic\">Genomic Sequence</A></TD>", f); fputs("<TD WIDTH=\"29%\"><P ALIGN=\"CENTER\"><A HREF=\"#1\">cDNA in Genomic</A></TD>", f); fputs("<TD WIDTH=\"21%\"><P ALIGN=\"CENTER\"><A HREF=\"#ali\">Side by Side</A></TD>", f); fputs("</TR></TABLE>\n", f); } if (cdsE > 0) { fprintf(f, "Matching bases in coding regions of cDNA and genomic sequences are colored blue%s. ", (upcMatch ? " and capitalized" : "")); fprintf(f, "Matching bases in UTR regions of cDNA and genomic sequences are colored red%s. ", (upcMatch ? " and capitalized" : "")); fputs("Light blue (coding) or orange (UTR) bases mark the boundaries of gaps in either sequence " "(often splice sites).\n", f); } else { fprintf(f, "Matching bases in cDNA and genomic sequences are colored blue%s. ", (upcMatch ? " and capitalized" : "")); fputs("Light blue bases mark the boundaries of gaps in either sequence " "(often splice sites).\n", f); } if (showNeedle && restrictToWindow) fputs("Bases that were in the selected browser region are shown in bold " "and underlined, " "and only the alignment for these bases is displayed in the " "Genomic and Side by Side sections.\n", f); if (showJumpTable) fputs("</P></CENTER>\n", f); htmHorizontalLine(f); fprintf(f, "<H4><A NAME=cDNA></A>cDNA %s%s</H4>\n", needleName, (rcNeedle ? " (reverse complemented)" : "")); if (rcNeedle) reverseComplement(needle, needleSize); if (showNeedle) { ffShNeedle(f, needle, needleSize, needleNumOffset, colorFlags, aliList, upcMatch, cdsS, cdsE, restrictToWindow, nPartS, nPartE); } if (showHaystack) { struct cfm *cfm = cfmNew(10, 50, TRUE, rcHaystack, f, hayExtremity); char *h = cloneMem(haystack, haySize); char *accentFlags = needMem(haySize); zeroBytes(accentFlags, haySize); fprintf(f, "<H4><A NAME=genomic></A>Genomic %s %s:</H4>\n", haystackName, (rcHaystack ? "(reverse strand)" : "")); fprintf(f, "<PRE><TT>\n"); zeroBytes(colorFlags, haySize); for (ali = leftAli; ali != NULL; ali = ali->right) { boolean utr = FALSE; int i; int off = ali->hStart-haystack; int count = ali->hEnd - ali->hStart; int offn = ali->nStart-needle; if ((cdsE > 0) && ((cdsS-offn-1) > 0)) utr = TRUE; for (i=0; i<count; ++i) { if (!utr && (i > (cdsE-offn-1)) && (cdsE > 0)) utr = TRUE; if (utr && (i == (cdsS-offn))) utr = FALSE; if (toupper(ali->hStart[i]) == toupper(ali->nStart[i])) { if (utr) colorFlags[off+i] = ((i == 0 || i == count-1) ? socOrange : socRed); else colorFlags[off+i] = ((i == 0 || i == count-1) ? socBrightBlue : socBlue); if (upcMatch) h[off+i] = toupper(h[off+i]); } if (restrictToWindow && off+i >= hayOffStart && off+i < hayOffEnd) accentFlags[off+i] = TRUE; } } ali = leftAli; lastAli = NULL; while (ali && (ali->hEnd - haystack) <= hayPaddedOffStart) ali = ali->right; for (i = hayPaddedOffStart; i < hayPaddedOffEnd; ++i) { /* Put down "anchor" on first match position in haystack * so user can hop here with a click on the needle. */ if (ali != NULL && i == ali->hStart - haystack) { if (lastAli == NULL || ali->hStart - lastAli->hEnd > blockMaxGap) { fprintf(f, "<A NAME=%d></A>", ++anchorCount); } lastAli = ali; ali = ali->right; } cfmOutExt(cfm, h[i], seqOutColorLookup[(int)colorFlags[i]], accentFlags[i], accentFlags[i], FALSE); } cfmFree(&cfm); freeMem(h); fprintf(f, "</TT></PRE>\n"); htmHorizontalLine(f); } if (showSideBySide) { fprintf(f, "<H4><A NAME=ali></A>Side by Side Alignment</H4>\n"); ffShowSideBySide(f, leftAli, needle, needleNumOffset, haystack, hayNumOffset, haySize, hayOffStart, hayOffEnd, blockMaxGap, rcHaystack, TRUE); fprintf(f, "<HR ALIGN=\"CENTER\">"); fprintf(f, "<EM>*Aligned Blocks with gaps <= %d bases are merged for " "this display when only one sequence has a gap, or when gaps in " "both sequences are of the same size.</EM>\n", blockMaxGap); } if (rcNeedle) reverseComplement(needle, needleSize); return anchorCount; }
void initGapAid(char *gapFileName) /* Initialize gap aid structure for faster gap * computations. */ { int i, tableSize, startLong = -1; char *sizeDesc[2]; char *words[128]; if (gapFileName != NULL) { struct lineFile *lf = lineFileOpen(gapFileName, TRUE); int count; lineFileNextRowTab(lf, sizeDesc, 2); tableSize = atoi(sizeDesc[1]); AllocArray(gapInitPos,tableSize); AllocArray(gapInitQGap,tableSize); AllocArray(gapInitTGap,tableSize); AllocArray(gapInitBothGap,tableSize); while (count = lineFileChopNext(lf, words, tableSize+1)) { if (sameString(words[0],"smallSize")) { aid.smallSize = atoi(words[1]); } if (sameString(words[0],"position")) { for (i=0 ; i<count-1 ; i++) gapInitPos[i] = atoi(words[i+1]); } if (sameString(words[0],"qGap")) { for (i=0 ; i<count-1 ; i++) gapInitQGap[i] = atoi(words[i+1]); } if (sameString(words[0],"tGap")) { for (i=0 ; i<count-1 ; i++) gapInitTGap[i] = atoi(words[i+1]); } if (sameString(words[0],"bothGap")) { for (i=0 ; i<count-1 ; i++) gapInitBothGap[i] = atoi(words[i+1]); } } if (aid.smallSize == 0) errAbort("missing smallSize parameter in %s\n",gapFileName); lineFileClose(&lf); } else { /* if no gap file, then setup default values */ /* Set up to handle small values */ aid.smallSize = 111; tableSize = 11; AllocArray(gapInitPos,tableSize); AllocArray(gapInitQGap,tableSize); AllocArray(gapInitTGap,tableSize); AllocArray(gapInitBothGap,tableSize); for (i = 0 ; i < tableSize ; i++) { gapInitPos[i] = gapInitPosDefault[i]; gapInitTGap[i] = gapInitTGapDefault[i]; gapInitQGap[i] = gapInitQGapDefault[i]; gapInitBothGap[i] = gapInitBothGapDefault[i]; } } AllocArray(aid.qSmall, aid.smallSize); AllocArray(aid.tSmall, aid.smallSize); AllocArray(aid.bSmall, aid.smallSize); for (i=1; i<aid.smallSize; ++i) { aid.qSmall[i] = interpolate(i, gapInitPos, gapInitQGap, tableSize); aid.tSmall[i] = interpolate(i, gapInitPos, gapInitTGap, tableSize); aid.bSmall[i] = interpolate(i, gapInitPos, gapInitBothGap, tableSize); } /* Set up to handle intermediate values. */ for (i=0; i<tableSize; ++i) { if (aid.smallSize == gapInitPos[i]) { startLong = i; break; } } if (startLong < 0) errAbort("No position %d in initGapAid()\n", aid.smallSize); aid.longCount = tableSize - startLong; aid.qPosCount = tableSize - startLong; aid.tPosCount = tableSize - startLong; aid.bPosCount = tableSize - startLong; aid.longPos = cloneMem(gapInitPos + startLong, aid.longCount * sizeof(int)); aid.qLong = cloneMem(gapInitQGap + startLong, aid.qPosCount * sizeof(double)); aid.tLong = cloneMem(gapInitTGap + startLong, aid.tPosCount * sizeof(double)); aid.bLong = cloneMem(gapInitBothGap + startLong, aid.bPosCount * sizeof(double)); /* Set up to handle huge values. */ aid.qLastPos = aid.longPos[aid.qPosCount-1]; aid.tLastPos = aid.longPos[aid.tPosCount-1]; aid.bLastPos = aid.longPos[aid.bPosCount-1]; aid.qLastPosVal = aid.qLong[aid.qPosCount-1]; aid.tLastPosVal = aid.tLong[aid.tPosCount-1]; aid.bLastPosVal = aid.bLong[aid.bPosCount-1]; aid.qLastSlope = calcSlope(aid.qLastPosVal, aid.qLong[aid.qPosCount-2], aid.qLastPos, aid.longPos[aid.qPosCount-2]); aid.tLastSlope = calcSlope(aid.tLastPosVal, aid.tLong[aid.tPosCount-2], aid.tLastPos, aid.longPos[aid.tPosCount-2]); aid.bLastSlope = calcSlope(aid.bLastPosVal, aid.bLong[aid.bPosCount-2], aid.bLastPos, aid.longPos[aid.bPosCount-2]); // uglyf("qLastPos %d, qlastPosVal %f, qLastSlope %f\n", aid.qLastPos, aid.qLastPosVal, aid.qLastSlope); // uglyf("tLastPos %d, tlastPosVal %f, tLastSlope %f\n", aid.tLastPos, aid.tLastPosVal, aid.tLastSlope); // uglyf("bLastPos %d, blastPosVal %f, bLastSlope %f\n", aid.bLastPos, aid.bLastPosVal, aid.bLastSlope); }
void outputBlocks(struct lineFile *lf, struct block *blockList, int score, FILE *f, boolean isRc, char *qName, int qSize, char *qNibDir, struct dlList *qCache, char *tName, int tSize, char *tNibDir, struct dlList *tCache, boolean rescore) /* Output block list as an axt to file f. */ { int qStart = BIGNUM, qEnd = 0, tStart = BIGNUM, tEnd = 0; struct block *lastBlock = NULL; struct block *block; struct dyString *qSym = newDyString(16*1024); struct dyString *tSym = newDyString(16*1024); struct dnaSeq *qSeq = NULL, *tSeq = NULL, *seq = NULL; struct axt axt; boolean qIsTwoBit = twoBitIsFile(qNibDir); boolean tIsTwoBit = twoBitIsFile(tNibDir); if (blockList == NULL) return; /* Figure overall dimensions. */ for (block = blockList; block != NULL; block = block->next) { if (qStart > block->qStart) qStart = block->qStart; if (qEnd < block->qEnd) qEnd = block->qEnd; if (tStart > block->tStart) tStart = block->tStart; if (tEnd < block->tEnd) tEnd = block->tEnd; } /* Load sequence covering alignment from nib files. */ if (isRc) { reverseIntRange(&qStart, &qEnd, qSize); if (qIsFa) { for (seq = qFaList ; seq != NULL ; seq = seq->next) if (sameString(qName, seq->name)) break; if (seq != NULL) { AllocVar(qSeq); qSeq->size = qEnd - qStart; qSeq->name = cloneString(qName); qSeq->dna = cloneMem((seq->dna)+qStart, qSeq->size); } else errAbort("sequence not found %s\n",qName); } else qSeq = readFromCache(qCache, qNibDir, qName, qStart, qEnd - qStart, qSize, qIsTwoBit); reverseIntRange(&qStart, &qEnd, qSize); reverseComplement(qSeq->dna, qSeq->size); } else { if (qIsFa) { for (seq = qFaList ; seq != NULL ; seq = seq->next) { if (sameString(qName, seq->name)) break; } if (seq != NULL) { AllocVar(qSeq); qSeq->size = qEnd - qStart; qSeq->name = cloneString(qName); qSeq->dna = (seq->dna)+qStart; } else errAbort("sequence not found %s\n",qName); } else qSeq = readFromCache(qCache, qNibDir, qName, qStart, qEnd - qStart, qSize, qIsTwoBit); } if (tIsFa) { for (seq = tFaList ; seq != NULL ; seq = seq->next) if (sameString(tName, seq->name)) break; if (seq != NULL) { AllocVar(tSeq); tSeq->size = tEnd - tStart; tSeq->name = cloneString(tName); tSeq->dna = cloneMem((seq->dna)+tStart, tSeq->size); } else errAbort("sequence not found %s\n",tName); } else tSeq = readFromCache(tCache, tNibDir, tName, tStart, tEnd - tStart, tSize, tIsTwoBit); /* Loop through blocks copying sequence into dynamic strings. */ for (block = blockList; block != NULL; block = block->next) { if (lastBlock != NULL) { int qGap = block->qStart - lastBlock->qEnd; int tGap = block->tStart - lastBlock->tEnd; if (qGap != 0 && tGap != 0) { errAbort("Gaps in both strand on alignment ending line %d of %s", lf->lineIx, lf->fileName); } if (qGap > 0) { dyStringAppendMultiC(tSym, '-', qGap); dyStringAppendN(qSym, qSeq->dna + lastBlock->qEnd - qStart, qGap); } if (tGap > 0) { dyStringAppendMultiC(qSym, '-', tGap); dyStringAppendN(tSym, tSeq->dna + lastBlock->tEnd - tStart, tGap); } } if (qSeq->size < block->qStart - qStart) { errAbort("read past end of sequence %s size =%d block->qStart-qstart=%d block->qStart=%d qEnd=%d \n", qName, qSeq->size, block->qStart-qStart,block->qStart, block->qEnd ); } dyStringAppendN(qSym, qSeq->dna + block->qStart - qStart, block->qEnd - block->qStart); if (tSeq->size < block->tStart - tStart) { errAbort("read past end of sequence %s size =%d block->tStart-tstart=%d\n", tName, tSeq->size, block->tStart-tStart); } dyStringAppendN(tSym, tSeq->dna + block->tStart - tStart, block->tEnd - block->tStart); lastBlock = block; } if (qSym->stringSize != tSym->stringSize) errAbort("qSize and tSize don't agree in alignment ending line %d of %s", lf->lineIx, lf->fileName); if (rescore) score = axtScoreSym(scoreScheme, qSym->stringSize, qSym->string, tSym->string); /* Fill in an axt and write it to output. */ ZeroVar(&axt); axt.qName = qName; axt.qStart = qStart; axt.qEnd = qEnd; axt.qStrand = (isRc ? '-' : '+'); axt.tName = tName; axt.tStart = tStart; axt.tEnd = tEnd; axt.tStrand = '+'; axt.score = score; axt.symCount = qSym->stringSize; axt.qSym = qSym->string; axt.tSym = tSym->string; axtWrite(&axt, f); /* Clean up. */ if (!qIsFa) freeDnaSeq(&qSeq); freeDnaSeq(&tSeq); dyStringFree(&qSym); dyStringFree(&tSym); }