Exemple #1
0
struct axt *createAxtGap(char *nibFile, char *chrom, 	
			 int start, int end, char strand)
/* return an axt alignment with the query all deletes - null aligment */
{
struct axt *axt;
int size = end-start;
char *gapPt = needLargeMem(size+1);
char *p;
struct dnaSeq *seq = NULL;

for (p=gapPt;p<=gapPt+size;p++)
    *p = '-';
AllocVar(axt);
axt->tName = chrom;
axt->tStart = start;
axt->tEnd = end;
axt->tStrand = strand;
axt->qName = "gap";
axt->qStart = 1;
axt->qEnd = size;
axt->qStrand = strand;
axt->symCount = size;
axt->score = 0;
seq = nibLoadPart(nibFile, start,size);
axt->tSym = cloneMem(seq->dna, size+1);
axt->qSym = cloneMem(gapPt, size+1);
return axt;
}
Exemple #2
0
struct bed *bedFromGenePred(struct genePred *genePred)
/* Convert a single genePred to a bed structure */
{
struct bed *bed;
int i, blockCount, *chromStarts, *blockSizes, chromStart;

/* A tiny bit of error checking on the genePred. */
if (genePred->txStart >= genePred->txEnd || genePred->cdsStart > genePred->cdsEnd)
    {
    errAbort("mangled genePred format for %s", genePred->name);
    }

/* Allocate bed and fill in from psl. */
AllocVar(bed);
bed->chrom = cloneString(genePred->chrom);
bed->chromStart = chromStart = genePred->txStart;
bed->chromEnd = genePred->txEnd;
bed->thickStart = genePred->cdsStart;
bed->thickEnd = genePred->cdsEnd;
bed->score = 0;
strncpy(bed->strand,  genePred->strand, sizeof(bed->strand));
bed->blockCount = blockCount = genePred->exonCount;
bed->blockSizes = blockSizes = (int *)cloneMem(genePred->exonEnds,(sizeof(int)*genePred->exonCount));
bed->chromStarts = chromStarts = (int *)cloneMem(genePred->exonStarts, (sizeof(int)*genePred->exonCount));
bed->name = cloneString(genePred->name);

/* Convert coordinates to relative and exnosEnds to blockSizes. */
for (i=0; i<blockCount; ++i)
    {
    blockSizes[i] -= chromStarts[i];
    chromStarts[i] -= chromStart;
    }
return bed;
}
Exemple #3
0
bam1_t *bamClone(const bam1_t *bam)
/* Return a newly allocated copy of bam. */
{
// Using typecasts to get around compiler complaints about bam being const:
    bam1_t *newBam = cloneMem((void *)bam, sizeof(*bam));
    newBam->data = cloneMem((void *)bam->data, bam->data_len*sizeof(bam->data[0]));
    return newBam;
}
Exemple #4
0
static void *cloneValues(void *valuesIn, enum asTypes type)
/* If valuesIn is non-null, return a copy of values according to type. */
{
void *valuesOut = NULL;
if (valuesIn != NULL)
    {
    if (asTypesIsFloating(type))
	valuesOut = cloneMem(valuesIn, 2*sizeof(double));
    else if (asTypesIsInt(type))
	valuesOut = cloneMem(valuesIn, 2*sizeof(long long));
    else
	valuesOut = cloneString((char *)valuesIn);
    }
return valuesOut;
}
Exemple #5
0
static struct dnaSeq *faReadAllMixableInLf(struct lineFile *lf, 
	boolean isDna, boolean mixed)
/* Return list of all sequences from open fa file. 
 * Mixed case parameter overrides isDna.  If mixed is false then
 * will return DNA in lower case and non-DNA in upper case. */
{
struct dnaSeq *seqList = NULL, *seq;
DNA *dna;
char *name;
int size;
boolean ok;

for (;;)
    {
    if (mixed)
        ok = faMixedSpeedReadNext(lf, &dna, &size, &name);
    else
        ok = faSomeSpeedReadNext(lf, &dna, &size, &name, isDna);
    if (!ok)
        break;
    AllocVar(seq);
    seq->name = cloneString(name);
    seq->size = size;
    seq->dna = cloneMem(dna, size+1);
    slAddHead(&seqList, seq);
    }
slReverse(&seqList);
faFreeFastBuf();
return seqList;
}
struct bed *pslToBed(struct psl *psl)
/* Convert a psl format row of strings to a bed, very similar to customTrack.c::customTrackPsl*/
{
struct bed *bed;
int i, blockCount, *chromStarts, chromStart;

/* A tiny bit of error checking on the psl. */
if (psl->qStart >= psl->qEnd || psl->qEnd > psl->qSize 
    || psl->tStart >= psl->tEnd || psl->tEnd > psl->tSize)
    {
    errAbort("mangled psl format for %s", psl->qName);
    }

/* Allocate bed and fill in from psl. */
AllocVar(bed);
bed->chrom = cloneString(psl->tName);
bed->chromStart = bed->thickStart =  chromStart = psl->tStart;
bed->chromEnd = bed->thickEnd = psl->tEnd;
bed->score = 1000 - 2*pslCalcMilliBad(psl, TRUE);
if (bed->score < 0) bed->score = 0;
strncpy(bed->strand,  psl->strand, sizeof(bed->strand));
bed->blockCount = blockCount = psl->blockCount;
bed->blockSizes = (int *)cloneMem(psl->blockSizes,(sizeof(int)*psl->blockCount));
bed->chromStarts = chromStarts = (int *)cloneMem(psl->tStarts, (sizeof(int)*psl->blockCount));
bed->name = cloneString(psl->qName);

/* Switch minus target strand to plus strand. */
if (psl->strand[1] == '-')
    {
    int chromSize = psl->tSize;
    reverseInts(bed->blockSizes, blockCount);
    reverseInts(chromStarts, blockCount);
    for (i=0; i<blockCount; ++i)
	chromStarts[i] = chromSize - chromStarts[i];
    }

/* Convert coordinates to relative. */
for (i=0; i<blockCount; ++i)
    chromStarts[i] -= chromStart;
return bed;
}
Exemple #7
0
struct annoRow *annoRowWigNew(char *chrom, uint start, uint end, boolean rightJoinFail,
			      float *values)
/* Allocate & return an annoRowWig, with clone of values; length of values is (end-start). */
{
struct annoRow *row;
AllocVar(row);
row->chrom = cloneString(chrom);
row->start = start;
row->end = end;
row->data = cloneMem(values, (end - start) * sizeof(values[0]));
row->rightJoinFail = rightJoinFail;
return row;
}
struct qaSeq *qaReadNext(struct lineFile *lf)
/* Read in next record in .qa file. */
{
struct qaSeq *qa, seq;

if (!qaFastReadNext(lf, &seq.qa, &seq.size, &seq.name))
    return NULL;
AllocVar(qa);
qa->name = cloneString(seq.name);
qa->size = seq.size;
qa->qa = cloneMem(seq.qa, seq.size+1);
return qa;
}
static void fillInQa(char *qaName, struct hash *hash, struct qaSeq *qaList)
/* Hash contains qaSeq's with DNA sequence but no
 * quality info.  Fill in quality info from .qa file. */
{    
struct lineFile *lf = lineFileOpen(qaName, TRUE);
struct qaSeq seq;

while (qaFastReadNext(lf, &seq.qa, &seq.size, &seq.name))
    {
    seq.qa = cloneMem(seq.qa, seq.size+1);
    attatchQaInfo(hash, seq.name, seq.qa, seq.size);
    }
lineFileClose(&lf);
checkAllPresent(qaList);
}
static struct qaSeq *qaFaRead(char *qaName, char *faName, boolean mustReadQa)
/* Read both QA(C) and FA files. */
{
FILE *f = NULL;
struct qaSeq *qaList = NULL, *qa;
struct hash *hash = newHash(0);
struct qaSeq seq;

/* Read in all the .fa files. */
f = mustOpen(faName, "r");
while (faFastReadNext(f, &seq.dna, &seq.size, &seq.name))
    {
    if (hashLookup(hash, seq.name) != NULL)
        {
	warn("Duplicate %s, ignoring all but first.", seq.name);
	continue;
	}
    AllocVar(qa);
    hashAdd(hash, seq.name, qa);
    qa->name = cloneString(seq.name);
    qa->dna = cloneMem(seq.dna, seq.size+1);
    qa->size = seq.size;
    slAddHead(&qaList, qa);
    }
fclose(f);

/* Read in corresponding .qa files and make sure they correspond.
 * If no file exists then fake it. */
if (qaName)
    {
    if (!mustReadQa && !fileExists(qaName))
	{
	warn("No quality file %s", qaName);
	for (qa = qaList; qa != NULL; qa = qa->next)
	     qaMakeFake(qa);
	}
    else
	{
	if (isQacFile(qaName))
	    fillInQac(qaName, hash, qaList);
	else
	    fillInQa(qaName, hash, qaList);
	}
    }
freeHash(&hash);
slReverse(&qaList);
return qaList;
}
Exemple #11
0
struct hash *loadChroms(char *dir)
/* Load zipped chromosome files into memory. */
{
FILE *f;
char fastaScan[16];
safef(fastaScan, sizeof(fastaScan), "*.%s", faExtn);
struct fileInfo *chromEl, *chromList = listDirX(dir, fastaScan, TRUE);
struct hash *chromHash = newHash(0);
struct dnaSeq *seq;
char chrom[128];
char *faName;
int count = 0;

verbose(2, "#    scanning '%s/%s'\n", dir, fastaScan);
for (chromEl = chromList; chromEl != NULL; chromEl = chromEl->next)
    {
    char *fileName = chromEl->name;
    splitPath(fileName, NULL, chrom, NULL);
    chopSuffix(chrom);
    if (startsWith("chr0", chrom)) /* Convert chr01 to chr1, etc. */
	stripChar(chrom, '0');
    if (sameString(chrom, "chrmt"))
        strcpy(chrom, "chr17");
    f = fopen(fileName, "r");
    AllocVar(seq);
    seq->name = cloneString(chrom);
    if (!faFastReadNext(f, &seq->dna, &seq->size, &faName))
        errAbort("Couldn't load sequence from %s", fileName);
    seq->dna = cloneMem(seq->dna, seq->size+1);
    toUpperN(seq->dna, seq->size);
    hashAdd(chromHash, chrom, seq);
    verbose(3, "#    loadChrom %s '%s'\n", fileName, chrom);
    fclose(f);
    f = NULL;
    count++;
    }
if (0 == count)
    errAbort("not fasta files found in '%s/%s'\n", dir, fastaScan);
return chromHash;
}
Exemple #12
0
char *cloneLongString(char *s)
/* Make clone of long string. */
{
size_t size = strlen(s);
return cloneMem(s, size+1);
}
struct cutter *readGcg(char *gcgFile)
/* Parse a GCG file and load it into cutter format. */
{
struct lineFile *lf = lineFileOpen(gcgFile,TRUE);
struct cutter *enzList = NULL;
char *line = "whatever", *words[10], numWords;

/* Skip to the right line. */
while (lineFileNext(lf,&line,NULL) && !startsWith("..",line));
/* */
while ((numWords=lineFileChop(lf,words)))
    {
    struct cutter *newone = NULL;
    int comIx = (numWords==7) ? 5 : 6;
    int refIx = (numWords==7) ? 6 : 7;
    int i;
    char *items[100];

    /* Skip ones */
    if (words[4][0] == '?')
	continue;
    AllocVar(newone);
    newone->semicolon = (words[0][0] == ';') ? TRUE : FALSE;
    /* Deal with the first few columns */
    if (!isdigit(words[1][0]))
	errAbort("Error: expecting a number in cut site column on line %d\n", lf->lineIx+1);
    if (!isdigit(words[3][0]) && words[3][0]!='-')
	errAbort("Error: expecting a number in the overhang column on line %d\n", lf->lineIx+1);
    if (words[comIx][0] != '>')
	errAbort("Error: expecting a \'>\' in the commercial sources column of line %d\n", lf->lineIx+1);
    newone->name = (words[0][0] == ';') ? cloneString(words[0]+1) : cloneString(words[0]);
    newone->cut = atoi(words[1]);
    newone->seq = cloneString(words[2]);
    touppers(newone->seq);
    stripChar(newone->seq,'\'');
    stripChar(newone->seq,'_');
    newone->size = strlen(newone->seq);
    newone->matchSize = newone->size - countChars(newone->seq, 'N');
    newone->palindromic = isPalindrome(newone->seq);
    newone->overhang = atoi(words[3]);
    newone->numCompanies = strlen(words[comIx]+1);
    if (newone->numCompanies > 0)
	newone->companies = cloneMem(words[comIx]+1, newone->numCompanies*sizeof(char));
    newone->numRefs = chopString(words[refIx], ",", items, ArraySize(items));
    AllocArray(newone->refs, newone->numRefs);
    for (i = 0; i < newone->numRefs; i++) 
	{
	if (i == 100)
	    errAbort("Error: Andy didn't make the array for holding references big enough\n");
	if (!isdigit(items[i][0]))
	    errAbort("Error: expecting number in references column in line %d\n", lf->lineIx+1);
	newone->refs[i] = atoi(items[i]);
	}
    /* Deal with isoscizomers. */
    if (numWords == 8)
	{
	newone->numSciz = chopString(words[5], ",", items, ArraySize(items));
	AllocArray(newone->scizs, newone->numSciz*sizeof(int));
	for (i = 0; i < newone->numSciz; i++)
	    {
	    if (i == 100)
		errAbort("Error: Andy didn't make the array for having isoscizomers big enough\n");
	    newone->scizs[i] = cloneString(items[i]);
	    }
	}
    else 
	newone->numSciz = 0;
    slAddHead(&enzList, newone);
    }
slReverse(&enzList);
lineFileClose(&lf);
return enzList;
}
Exemple #14
0
static void rFindMulti(struct bptFile *bpt, bits64 blockStart, void *key, struct slRef **pList)
/* Find values corresponding to key and add them to pList.  You'll need to 
 * Do a slRefFreeListAndVals() on the list when done. */
{
/* Seek to start of block. */
udcSeek(bpt->udc, blockStart);

/* Read block header. */
UBYTE isLeaf;
UBYTE reserved;
bits16 i, childCount;
udcMustReadOne(bpt->udc, isLeaf);
udcMustReadOne(bpt->udc, reserved);
boolean isSwapped = bpt->isSwapped;
childCount = udcReadBits16(bpt->udc, isSwapped);

int keySize = bpt->keySize;
UBYTE keyBuf[keySize];   /* Place to put a key, buffered on stack. */
UBYTE valBuf[bpt->valSize];   /* Place to put a value, buffered on stack. */

if (isLeaf)
    {
    for (i=0; i<childCount; ++i)
        {
	udcMustRead(bpt->udc, keyBuf, keySize);
	udcMustRead(bpt->udc, valBuf, bpt->valSize);
	if (memcmp(key, keyBuf, keySize) == 0)
	    {
	    void *val = cloneMem(valBuf, bpt->valSize);
	    refAdd(pList, val);
	    }
	}
    }
else
    {
    /* Read first key and first file offset. */
    udcMustRead(bpt->udc, keyBuf, keySize);
    bits64 lastFileOffset = udcReadBits64(bpt->udc, isSwapped);
    bits64 fileOffset = lastFileOffset;
    int lastCmp = memcmp(key, keyBuf, keySize);

    /* Loop through remainder. */
    for (i=1; i<childCount; ++i)
	{
	udcMustRead(bpt->udc, keyBuf, keySize);
	fileOffset = udcReadBits64(bpt->udc, isSwapped);
	int cmp = memcmp(key, keyBuf, keySize);
	if (lastCmp >= 0 && cmp <= 0)
	    {
	    bits64 curPos = udcTell(bpt->udc);
	    rFindMulti(bpt, lastFileOffset, key, pList);
	    udcSeek(bpt->udc, curPos);
	    }
	if (cmp < 0)
	    return;
	lastCmp = cmp;
	lastFileOffset = fileOffset;
	}
    /* If made it all the way to end, do last one too. */
    rFindMulti(bpt, fileOffset, key, pList);
    }
}
Exemple #15
0
static void ffShNeedle(FILE *f, DNA *needle, int needleSize,
		       int needleNumOffset, char *colorFlags,
		       struct ffAli *aliList, boolean upcMatch,
		       int cdsS, int cdsE,
		       boolean accentRange, int accentStart, int accentEnd)
/* Display the needle sequence with HTML highlighting. */
{
struct cfm *cfm = cfmNew(10, 50, TRUE, FALSE, f, needleNumOffset);
char *n = cloneMem(needle, needleSize);
char *accentFlags = needMem(needleSize);
struct ffAli *leftAli = aliList;
struct ffAli *ali;
long i;

zeroBytes(colorFlags, needleSize);
zeroBytes(accentFlags, needleSize);
fprintf(f, "<PRE><TT>\n");
if (aliList != NULL)
    {
    for (leftAli = aliList; leftAli->left != NULL; leftAli = leftAli->left)
	;
    }
for (ali = leftAli; ali != NULL; ali = ali->right)
    {
    boolean utr = FALSE;
    int off = ali->nStart-needle;
    int count = ali->nEnd - ali->nStart;
    if ((cdsE > 0) && ((cdsS-off-1) > 0)) 
	utr = TRUE;
    for (i=0; i<count; ++i)
	{
	if (!utr && (i > (cdsE-off-1)) && (cdsE > 0))
	    utr = TRUE;
	if (utr && (i == (cdsS-off)))
	    utr = FALSE;
	if (toupper(ali->hStart[i]) == toupper(ali->nStart[i]))
	    {
	    if (utr)
		colorFlags[off+i] = ((i == 0 || i == count-1) ? socOrange : socRed);
	    else
		colorFlags[off+i] = ((i == 0 || i == count-1) ? socBrightBlue : socBlue);
	    if (upcMatch)
		n[off+i] = toupper(n[off+i]);
	    }
	if (accentRange)
	    {
	    if (off+i >= accentStart && off+i < accentEnd)
		accentFlags[off+i] = TRUE;
	    }
	}
    }
for (i=0; i<needleSize; ++i)
    {
    if (accentRange && i == accentStart)
	fprintf(f, "<A NAME=cDNAStart></A>");
    cfmOutExt(cfm, n[i], seqOutColorLookup[(int)colorFlags[i]],
	      accentFlags[i], accentFlags[i], FALSE);
    }
cfmFree(&cfm);
freeMem(n);
freeMem(accentFlags);
fprintf(f, "</TT></PRE>\n");
htmHorizontalLine(f);
}
Exemple #16
0
static struct blastBlock *nextBlock(struct blastFile *bf, struct blastQuery *bq,
                                    struct blastGappedAli *bga, boolean *skipRet)
/* Read in next blast block.  Return NULL at EOF or end of gapped
 * alignment. If an unparsable block is found, set skipRet to TRUE and return
 * NULL. */
{
struct blastBlock *bb;
char *line;
char *words[16];
int wordCount;
char *parts[3];
int partCount;
static struct dyString *qString = NULL, *tString = NULL;

verbose(TRACE_LEVEL,  "blastFileNextBlock\n");
*skipRet = FALSE;

/* Seek until get something like:
 *   Score = 8770 bits (4424), Expect = 0.0
 * or something that looks like we're done with this gapped
 * alignment. */
for (;;)
    {
    if (!nextBlockLine(bf, bq, &line))
	return NULL;
    if (startsWith(" Score", line))
	break;
    }
AllocVar(bb);
bb->gappedAli = bga;
wordCount = chopLine(line, words);
if (wordCount < 8 || !sameWord("Score", words[0]) 
    || !isdigit(words[2][0]) || !(isdigit(words[7][0]) || words[7][0] == 'e')
    || !startsWith("Expect", words[5]))
    {
    bfError(bf, "Expecting something like:\n"
             "Score = 8770 bits (4424), Expect = 0.0");
    }
bb->bitScore = atof(words[2]);
bb->eVal = evalToDouble(words[7]);

/* Process something like:
 *   Identities = 8320/9618 (86%), Gaps = 3/9618 (0%)
 *             or
 *   Identities = 8320/9618 (86%)
 *             or
 *   Identities = 10/19 (52%), Positives = 15/19 (78%), Frame = +2
 *     (wu-tblastn)
 *             or
 *   Identities = 256/400 (64%), Positives = 306/400 (76%)
 *   Frame = +1 / -2
 *     (tblastn)
 *
 *   Identities = 1317/10108 (13%), Positives = 2779/10108 (27%), Gaps = 1040/10108
 *   (10%)
 *      - wrap on long lines
 *
 * Handle weird cases where the is only a `Score' line, with no `Identities'
 * lines by skipping the alignment; they seem line small, junky alignments.
 */
line = bfNeedNextLine(bf);
wordCount = chopLine(line, words);
if (wordCount < 3 || !sameWord("Identities", words[0]))
    {
    if (wordCount > 1 || sameWord("Score", words[0]))
        {
        /* ugly hack to skip block with no identities */
        *skipRet = TRUE;
        blastBlockFree(&bb);
        return NULL;
        }
    bfError(bf, "Expecting identity count");
    }
partCount = chopByChar(words[2], '/', parts, ArraySize(parts));
if (partCount != 2 || !isdigit(parts[0][0]) || !isdigit(parts[1][0]))
    bfSyntax(bf);
bb->matchCount = atoi(parts[0]);
bb->totalCount = atoi(parts[1]);
if (wordCount >= 7 && sameWord("Gaps", words[4]))
    {
    if (!isdigit(words[6][0]))
	bfSyntax(bf);
    bb->insertCount = atoi(words[6]);
    }
if ((wordCount >= 11) && sameWord("Frame", words[8]))
    {
    bb->qStrand = '+';
    bb->tStrand = words[10][0];
    bb->tFrame = atoi(words[10]);
    }

line = bfNeedNextLine(bf);
boolean wrapped = (startsWith("(", line));

/* Process something like:
 *     Strand = Plus / Plus (blastn)
 *     Frame = +1           (tblastn)
 *     Frame = +1 / -2      (tblastx)
 *     <blank line>         (blastp)
 * note that wu-tblastn puts frame on Identities line
 */
if (wrapped)
    line = bfNeedNextLine(bf);
wordCount = chopLine(line, words);
if ((wordCount >= 5) && sameWord("Strand", words[0]))
    {
    bb->qStrand = getStrand(bf, words[2]);
    bb->tStrand = getStrand(bf, words[4]);
    }
else if ((wordCount >= 5) && sameWord("Frame", words[0]) && (words[3][0] == '/'))
    {
    // Frame = +1 / -2      (tblastx)
    bb->qStrand = (words[2][0] == '-') ? -1 : 1;
    bb->tStrand = (words[4][0] == '-') ? -1 : 1;
    bb->qFrame = atoi(words[2]);
    bb->tFrame = atoi(words[4]);
    }
else if ((wordCount >= 3) && sameWord("Frame", words[0]))
    {
    // Frame = +1           (tblastn)
    bb->qStrand = 1;
    bb->tStrand = (words[2][0] == '-') ? -1 : 1;
    bb->qFrame = atoi(words[2]);
    bb->tFrame = 1;
    }
else if (wordCount == 0)
    {
    /* if we didn't parse frame, default it */
    if (bb->qStrand == 0)
        {
        bb->qStrand = '+';
        bb->tStrand = '+';
        }
    }
else
    bfError(bf, "Expecting Strand, Frame or blank line");


/* Process alignment lines.  They come in groups of three
 * separated by a blank line - something like:
 * Query: 26429 taccttgacattcctcagtgtgtcatcatcgttctctcctccaaacggcgagagtccgga 26488
 *              |||||| |||||||||| ||| ||||||||||||||||||||||| || || ||||||||
 * Sbjct: 62966 taccttaacattcctcaatgtttcatcatcgttctctcctccaaatggtgaaagtccgga 63025
 */
if (qString == NULL)
    {
    qString = newDyString(50000);
    tString = newDyString(50000);
    }
clearBlastBlock(bb, qString, tString);
for (;;)
    {
    if (!findBlockSeqPair(bf, bq))
        break;
    parseBlockSeqPair(bf, bb, qString, tString);
    }

/* convert to [0..n) and move to strand coords if necessary */
bb->qStart--;
if (bb->qStrand < 0)
    reverseIntRange(&bb->qStart, &bb->qEnd, bq->queryBaseCount);
bb->tStart--;
if (bb->tStrand < 0)
    reverseIntRange(&bb->tStart, &bb->tEnd, bga->targetSize);
bb->qSym = cloneMem(qString->string, qString->stringSize+1);
bb->tSym = cloneMem(tString->string, tString->stringSize+1);
return bb;
}
Exemple #17
0
struct gapCalc *gapCalcRead(struct lineFile *lf)
/* Create gapCalc from open file. */
{
int i, tableSize, startLong = -1;
struct gapCalc *gapCalc;
int *gapInitPos;  
double *gapInitQGap;  
double *gapInitTGap;  
double *gapInitBothGap;

AllocVar(gapCalc);

/* Parse file. */
readTaggedNumLine(lf, "tableSize", 1, &tableSize, NULL);
readTaggedNumLine(lf, "smallSize", 1, &gapCalc->smallSize, NULL);
AllocArray(gapInitPos,tableSize);
AllocArray(gapInitQGap,tableSize);
AllocArray(gapInitTGap,tableSize);
AllocArray(gapInitBothGap,tableSize);
readTaggedNumLine(lf, "position", tableSize, gapInitPos, NULL);
readTaggedNumLine(lf, "qGap", tableSize, NULL, gapInitQGap);
readTaggedNumLine(lf, "tGap", tableSize, NULL, gapInitTGap);
readTaggedNumLine(lf, "bothGap", tableSize, NULL, gapInitBothGap);

/* Set up precomputed interpolations for small gaps. */
AllocArray(gapCalc->qSmall, gapCalc->smallSize);
AllocArray(gapCalc->tSmall, gapCalc->smallSize);
AllocArray(gapCalc->bSmall, gapCalc->smallSize);
for (i=1; i<gapCalc->smallSize; ++i)
    {
    gapCalc->qSmall[i] = 
	interpolate(i, gapInitPos, gapInitQGap, tableSize);
    gapCalc->tSmall[i] = 
	interpolate(i, gapInitPos, gapInitTGap, tableSize);
    gapCalc->bSmall[i] = interpolate(i, gapInitPos, 
	gapInitBothGap, tableSize);
    }

/* Set up to handle intermediate values. */
for (i=0; i<tableSize; ++i)
    {
    if (gapCalc->smallSize == gapInitPos[i])
	{
	startLong = i;
	break;
	}
    }
if (startLong < 0)
    errAbort("No position %d in gapCalcRead()\n", gapCalc->smallSize);
gapCalc->longCount = tableSize - startLong;
gapCalc->qPosCount = tableSize - startLong;
gapCalc->tPosCount = tableSize - startLong;
gapCalc->bPosCount = tableSize - startLong;
gapCalc->longPos = cloneMem(gapInitPos + startLong, gapCalc->longCount * sizeof(int));
gapCalc->qLong = cloneMem(gapInitQGap + startLong, gapCalc->qPosCount * sizeof(double));
gapCalc->tLong = cloneMem(gapInitTGap + startLong, gapCalc->tPosCount * sizeof(double));
gapCalc->bLong = cloneMem(gapInitBothGap + startLong, gapCalc->bPosCount * sizeof(double));

/* Set up to handle huge values. */
gapCalc->qLastPos = gapCalc->longPos[gapCalc->qPosCount-1];
gapCalc->tLastPos = gapCalc->longPos[gapCalc->tPosCount-1];
gapCalc->bLastPos = gapCalc->longPos[gapCalc->bPosCount-1];
gapCalc->qLastPosVal = gapCalc->qLong[gapCalc->qPosCount-1];
gapCalc->tLastPosVal = gapCalc->tLong[gapCalc->tPosCount-1];
gapCalc->bLastPosVal = gapCalc->bLong[gapCalc->bPosCount-1];
gapCalc->qLastSlope = calcSlope(gapCalc->qLastPosVal, gapCalc->qLong[gapCalc->qPosCount-2],
			   gapCalc->qLastPos, gapCalc->longPos[gapCalc->qPosCount-2]);
gapCalc->tLastSlope = calcSlope(gapCalc->tLastPosVal, gapCalc->tLong[gapCalc->tPosCount-2],
			   gapCalc->tLastPos, gapCalc->longPos[gapCalc->tPosCount-2]);
gapCalc->bLastSlope = calcSlope(gapCalc->bLastPosVal, gapCalc->bLong[gapCalc->bPosCount-2],
			   gapCalc->bLastPos, gapCalc->longPos[gapCalc->bPosCount-2]);
freez(&gapInitPos);
freez(&gapInitQGap);
freez(&gapInitTGap);
freez(&gapInitBothGap);
return gapCalc;
}
Exemple #18
0
int ffShAliPart(FILE *f, struct ffAli *aliList, 
    char *needleName, DNA *needle, int needleSize, int needleNumOffset,
    char *haystackName, DNA *haystack, int haySize, int hayNumOffset,
    int blockMaxGap, boolean rcNeedle, boolean rcHaystack,
    boolean showJumpTable, 
    boolean showNeedle, boolean showHaystack,
    boolean showSideBySide, boolean upcMatch,
    int cdsS, int cdsE, int hayPartS, int hayPartE)
/* Display parts of alignment on html page.  If hayPartS..hayPartE is a 
 * smaller subrange of the alignment, highlight that part of the alignment 
 * in both needle and haystack with underline & bold, and show only that 
 * part of the haystack (plus padding).  Returns number of blocks (after
 * merging blocks separated by blockMaxGap or less). */
{
long i;
struct ffAli *ali;
struct ffAli *lastAli;
struct ffAli *leftAli = aliList;
struct ffAli *rightAli = aliList;
int maxSize = (needleSize > haySize ? needleSize : haySize);
char *colorFlags = needMem(maxSize);
int anchorCount = 0;
boolean restrictToWindow = FALSE;
int hayOffStart = 0, hayOffEnd = haySize;
int hayPaddedOffStart = 0, hayPaddedOffEnd = haySize;
int hayExtremity = rcHaystack ? (hayNumOffset + haySize) : hayNumOffset;
int nPartS=0, nPartE=0;

if (aliList != NULL)
    {
    while (leftAli->left != NULL) leftAli = leftAli->left;
    while (rightAli->right != NULL) rightAli = rightAli->right;
    }

/* If we are only showing part of the alignment, translate haystack window
 * coords to needle window coords and haystack-offset window coords: */
if (hayPartS > (hayNumOffset + (leftAli->hStart - haystack)) ||
    (hayPartE > 0 && hayPartE < (hayNumOffset + (rightAli->hEnd - haystack))))
    {
    DNA *haystackPartS;
    DNA *haystackPartE;
    restrictToWindow = TRUE;
    if (rcHaystack)
	{
	haystackPartS = haystack + (haySize - (hayPartE - hayNumOffset));
	haystackPartE = haystack + (haySize - (hayPartS - hayNumOffset));
	}
    else
	{
	haystackPartS = haystack + hayPartS - hayNumOffset;
	haystackPartE = haystack + hayPartE - hayNumOffset;
	}
    boolean foundStart = FALSE;
    hayOffStart = haystackPartS - haystack;
    hayOffEnd = haystackPartE - haystack;
    for (ali = leftAli;  ali != NULL;  ali = ali->right)
	{
	if (haystackPartS < ali->hEnd && !foundStart)
	    {
	    int offset = haystackPartS - ali->hStart;
	    if (offset < 0)
		offset = 0;
	    nPartS = offset + ali->nStart - needle;
	    hayOffStart = offset + ali->hStart - haystack;
	    foundStart = TRUE;
	    }
	if (haystackPartE > ali->hStart)
	    {
	    if (haystackPartE > ali->hEnd)
		{
		nPartE = ali->nEnd - needle;
		hayOffEnd = ali->hEnd - haystack;
		}
	    else
		{
		nPartE = haystackPartE - ali->hStart + ali->nStart - needle;
		hayOffEnd = haystackPartE - haystack;
		}
	    }
	}
    hayPaddedOffStart = max(0, (hayOffStart - 100));
    hayPaddedOffEnd = min(haySize, (hayOffEnd + 100));
    if (rcHaystack)
	hayExtremity = hayNumOffset + haySize - hayPaddedOffStart;
    else
	hayExtremity = hayNumOffset + hayPaddedOffStart;
    }

if (showJumpTable)
    {
    fputs("<CENTER><P><TABLE BORDER=1 WIDTH=\"97%\"><TR>", f);
    fputs("<TD WIDTH=\"23%\"><P ALIGN=CENTER><A HREF=\"#cDNA\">cDNA Sequence</A></TD>", f);
    if (restrictToWindow)
	fputs("<TD WIDTH=\"23%\"><P ALIGN=CENTER><A HREF=\"#cDNAStart\">cDNA Sequence in window</A></TD>", f);
    fputs("<TD WIDTH=\"27%\"><P ALIGN=\"CENTER\"><A HREF=\"#genomic\">Genomic Sequence</A></TD>", f);
    fputs("<TD WIDTH=\"29%\"><P ALIGN=\"CENTER\"><A HREF=\"#1\">cDNA in Genomic</A></TD>", f);
    fputs("<TD WIDTH=\"21%\"><P ALIGN=\"CENTER\"><A HREF=\"#ali\">Side by Side</A></TD>", f);
    fputs("</TR></TABLE>\n", f);
    }
if (cdsE > 0) 
    {
    fprintf(f, "Matching bases in coding regions of cDNA and genomic sequences are colored blue%s. ", 
	    (upcMatch ? " and capitalized" : ""));
    fprintf(f, "Matching bases in UTR regions of cDNA and genomic sequences are colored red%s. ", 
	    (upcMatch ? " and capitalized" : ""));
    fputs("Light blue (coding) or orange (UTR) bases mark the boundaries of gaps in either sequence "
	  "(often splice sites).\n", f);
    } 
else 
    {
    fprintf(f, "Matching bases in cDNA and genomic sequences are colored blue%s. ", 
	    (upcMatch ? " and capitalized" : ""));
    fputs("Light blue bases mark the boundaries of gaps in either sequence "
	  "(often splice sites).\n", f);
    } 
if (showNeedle && restrictToWindow)
    fputs("Bases that were in the selected browser region are shown in bold "
	  "and underlined, "
	  "and only the alignment for these bases is displayed in the "
	  "Genomic and Side by Side sections.\n", f);

if (showJumpTable)
    fputs("</P></CENTER>\n", f);
htmHorizontalLine(f);

fprintf(f, "<H4><A NAME=cDNA></A>cDNA %s%s</H4>\n", needleName, (rcNeedle ? " (reverse complemented)" : ""));

if (rcNeedle)
    reverseComplement(needle, needleSize);

if (showNeedle)
    {
    ffShNeedle(f, needle, needleSize, needleNumOffset, colorFlags,
	       aliList, upcMatch, cdsS, cdsE,
	       restrictToWindow, nPartS, nPartE);
    }

if (showHaystack)
    {
    struct cfm *cfm = cfmNew(10, 50, TRUE, rcHaystack, f, hayExtremity);
    char *h = cloneMem(haystack, haySize);
    char *accentFlags = needMem(haySize);
    zeroBytes(accentFlags, haySize);
    fprintf(f, "<H4><A NAME=genomic></A>Genomic %s %s:</H4>\n", 
    	haystackName,
	(rcHaystack ? "(reverse strand)" : ""));
    fprintf(f, "<PRE><TT>\n");
    zeroBytes(colorFlags, haySize);
    for (ali = leftAli; ali != NULL; ali = ali->right)
	{
	boolean utr = FALSE;
	int i;
	int off = ali->hStart-haystack;
	int count = ali->hEnd - ali->hStart;
	int offn = ali->nStart-needle;
	if ((cdsE > 0) && ((cdsS-offn-1) > 0)) 
	    utr = TRUE;
	for (i=0; i<count; ++i)
	    {
	    if (!utr && (i > (cdsE-offn-1)) && (cdsE > 0))
		utr = TRUE;
	    if (utr && (i == (cdsS-offn)))
		utr = FALSE;
	    if (toupper(ali->hStart[i]) == toupper(ali->nStart[i]))
		{
		if (utr)
		    colorFlags[off+i] = ((i == 0 || i == count-1) ? socOrange : socRed);
		else
		    colorFlags[off+i] = ((i == 0 || i == count-1) ? socBrightBlue : socBlue);
		if (upcMatch)
		    h[off+i] = toupper(h[off+i]);
		}
	    if (restrictToWindow && off+i >= hayOffStart && off+i < hayOffEnd)
		accentFlags[off+i] = TRUE;
	    }
	}
    ali = leftAli;
    lastAli = NULL;
    while (ali && (ali->hEnd - haystack) <= hayPaddedOffStart)
	ali = ali->right;
    for (i = hayPaddedOffStart; i < hayPaddedOffEnd; ++i)
	{
	/* Put down "anchor" on first match position in haystack
	 * so user can hop here with a click on the needle. */
	if (ali != NULL &&  i == ali->hStart - haystack)
	    {
	    if (lastAli == NULL || ali->hStart - lastAli->hEnd > blockMaxGap)
		{
		fprintf(f, "<A NAME=%d></A>", ++anchorCount);
		}
	    lastAli = ali;
	    ali = ali->right;
	    }
	cfmOutExt(cfm, h[i], seqOutColorLookup[(int)colorFlags[i]],
		  accentFlags[i], accentFlags[i], FALSE);
	}
    cfmFree(&cfm);
    freeMem(h);
    fprintf(f, "</TT></PRE>\n");
    htmHorizontalLine(f);
    }

if (showSideBySide)
    {
    fprintf(f, "<H4><A NAME=ali></A>Side by Side Alignment</H4>\n");
    ffShowSideBySide(f, leftAli, needle, needleNumOffset, haystack, hayNumOffset, haySize,
		     hayOffStart, hayOffEnd, blockMaxGap, rcHaystack, TRUE);
    fprintf(f, "<HR ALIGN=\"CENTER\">");
    fprintf(f, "<EM>*Aligned Blocks with gaps &lt;= %d bases are merged for "
	    "this display when only one sequence has a gap, or when gaps in "
	    "both sequences are of the same size.</EM>\n", blockMaxGap);
    }
if (rcNeedle)
    reverseComplement(needle, needleSize);
return anchorCount;
}
void initGapAid(char *gapFileName)
/* Initialize gap aid structure for faster gap
 * computations. */
{
int i, tableSize, startLong = -1;
char *sizeDesc[2];
char *words[128];

if (gapFileName != NULL)
    {
    struct lineFile *lf = lineFileOpen(gapFileName, TRUE);
    int count;

    lineFileNextRowTab(lf, sizeDesc, 2);
    tableSize = atoi(sizeDesc[1]);
    AllocArray(gapInitPos,tableSize);
    AllocArray(gapInitQGap,tableSize);
    AllocArray(gapInitTGap,tableSize);
    AllocArray(gapInitBothGap,tableSize);
    while (count = lineFileChopNext(lf, words, tableSize+1))
        {
        if (sameString(words[0],"smallSize"))
            {
            aid.smallSize = atoi(words[1]);
            }
        if (sameString(words[0],"position"))
            {
            for (i=0 ; i<count-1 ; i++)
                gapInitPos[i] = atoi(words[i+1]);
            }
        if (sameString(words[0],"qGap"))
            {
            for (i=0 ; i<count-1 ; i++)
                gapInitQGap[i] = atoi(words[i+1]);
            }
        if (sameString(words[0],"tGap"))
            {
            for (i=0 ; i<count-1 ; i++)
                gapInitTGap[i] = atoi(words[i+1]);
            }
        if (sameString(words[0],"bothGap"))
            {
            for (i=0 ; i<count-1 ; i++)
                gapInitBothGap[i] = atoi(words[i+1]);
            }
            
        }
    if (aid.smallSize == 0)
        errAbort("missing smallSize parameter in %s\n",gapFileName);
    lineFileClose(&lf);
    }
else
    {
    /* if no gap file, then setup default values */ 
    /* Set up to handle small values */
    aid.smallSize = 111;
    tableSize = 11;
    AllocArray(gapInitPos,tableSize);
    AllocArray(gapInitQGap,tableSize);
    AllocArray(gapInitTGap,tableSize);
    AllocArray(gapInitBothGap,tableSize);
    for (i = 0 ; i < tableSize ; i++)
        {
        gapInitPos[i] = gapInitPosDefault[i];
        gapInitTGap[i] = gapInitTGapDefault[i];
        gapInitQGap[i] = gapInitQGapDefault[i];
        gapInitBothGap[i] = gapInitBothGapDefault[i];
        }
    }
    AllocArray(aid.qSmall, aid.smallSize);
    AllocArray(aid.tSmall, aid.smallSize);
    AllocArray(aid.bSmall, aid.smallSize);
    for (i=1; i<aid.smallSize; ++i)
        {
        aid.qSmall[i] = 
            interpolate(i, gapInitPos, gapInitQGap, tableSize);
        aid.tSmall[i] = 
            interpolate(i, gapInitPos, gapInitTGap, tableSize);
        aid.bSmall[i] = interpolate(i, gapInitPos, 
            gapInitBothGap, tableSize);
        }

    /* Set up to handle intermediate values. */
    for (i=0; i<tableSize; ++i)
        {
        if (aid.smallSize == gapInitPos[i])
            {
            startLong = i;
            break;
            }
        }
    if (startLong < 0)
        errAbort("No position %d in initGapAid()\n", aid.smallSize);
    aid.longCount = tableSize - startLong;
    aid.qPosCount = tableSize - startLong;
    aid.tPosCount = tableSize - startLong;
    aid.bPosCount = tableSize - startLong;
    aid.longPos = cloneMem(gapInitPos + startLong, aid.longCount * sizeof(int));
    aid.qLong = cloneMem(gapInitQGap + startLong, aid.qPosCount * sizeof(double));
    aid.tLong = cloneMem(gapInitTGap + startLong, aid.tPosCount * sizeof(double));
    aid.bLong = cloneMem(gapInitBothGap + startLong, aid.bPosCount * sizeof(double));

    /* Set up to handle huge values. */
    aid.qLastPos = aid.longPos[aid.qPosCount-1];
    aid.tLastPos = aid.longPos[aid.tPosCount-1];
    aid.bLastPos = aid.longPos[aid.bPosCount-1];
    aid.qLastPosVal = aid.qLong[aid.qPosCount-1];
    aid.tLastPosVal = aid.tLong[aid.tPosCount-1];
    aid.bLastPosVal = aid.bLong[aid.bPosCount-1];
    aid.qLastSlope = calcSlope(aid.qLastPosVal, aid.qLong[aid.qPosCount-2],
                               aid.qLastPos, aid.longPos[aid.qPosCount-2]);
    aid.tLastSlope = calcSlope(aid.tLastPosVal, aid.tLong[aid.tPosCount-2],
                               aid.tLastPos, aid.longPos[aid.tPosCount-2]);
    aid.bLastSlope = calcSlope(aid.bLastPosVal, aid.bLong[aid.bPosCount-2],
                               aid.bLastPos, aid.longPos[aid.bPosCount-2]);
    // uglyf("qLastPos %d, qlastPosVal %f, qLastSlope %f\n", aid.qLastPos, aid.qLastPosVal, aid.qLastSlope);
    // uglyf("tLastPos %d, tlastPosVal %f, tLastSlope %f\n", aid.tLastPos, aid.tLastPosVal, aid.tLastSlope);
    // uglyf("bLastPos %d, blastPosVal %f, bLastSlope %f\n", aid.bLastPos, aid.bLastPosVal, aid.bLastSlope);
}
Exemple #20
0
void outputBlocks(struct lineFile *lf,
	struct block *blockList, int score, FILE *f, boolean isRc, 
	char *qName, int qSize, char *qNibDir, struct dlList *qCache,
	char *tName, int tSize, char *tNibDir, struct dlList *tCache,
	boolean rescore)
/* Output block list as an axt to file f. */
{
int qStart = BIGNUM, qEnd = 0, tStart = BIGNUM, tEnd = 0;
struct block *lastBlock = NULL;
struct block *block;
struct dyString *qSym = newDyString(16*1024);
struct dyString *tSym = newDyString(16*1024);
struct dnaSeq *qSeq = NULL, *tSeq = NULL, *seq = NULL;
struct axt axt;
boolean qIsTwoBit = twoBitIsFile(qNibDir);
boolean tIsTwoBit = twoBitIsFile(tNibDir);

if (blockList == NULL)
    return;

/* Figure overall dimensions. */
for (block = blockList; block != NULL; block = block->next)
    {
    if (qStart > block->qStart) qStart = block->qStart;
    if (qEnd < block->qEnd) qEnd = block->qEnd;
    if (tStart > block->tStart) tStart = block->tStart;
    if (tEnd < block->tEnd) tEnd = block->tEnd;
    }

/* Load sequence covering alignment from nib files. */
if (isRc)
    {
    reverseIntRange(&qStart, &qEnd, qSize);
    if (qIsFa)
        {
        for (seq = qFaList ; seq != NULL ; seq = seq->next)
            if (sameString(qName, seq->name))
                break;
        if (seq != NULL)
            {
            AllocVar(qSeq);
            qSeq->size = qEnd - qStart;
            qSeq->name = cloneString(qName);
            qSeq->dna = cloneMem((seq->dna)+qStart, qSeq->size);
            }
        else
            errAbort("sequence not found %s\n",qName);
        }
    else
        qSeq = readFromCache(qCache, qNibDir, qName, qStart, qEnd - qStart, qSize, qIsTwoBit);
    reverseIntRange(&qStart, &qEnd, qSize);
    reverseComplement(qSeq->dna, qSeq->size);
    }
else
    {    
    if (qIsFa)
        {
        for (seq = qFaList ; seq != NULL ; seq = seq->next)
	    {
            if (sameString(qName, seq->name))
                break;
	    }
	if (seq != NULL)
	    {
	    AllocVar(qSeq);
	    qSeq->size = qEnd - qStart;
	    qSeq->name = cloneString(qName);
	    qSeq->dna = (seq->dna)+qStart;
	    }
	else
	    errAbort("sequence not found %s\n",qName);
        }
    else
        qSeq = readFromCache(qCache, qNibDir, qName, qStart, qEnd - qStart, qSize, qIsTwoBit);
    }
    if (tIsFa)
        {
        for (seq = tFaList ; seq != NULL ; seq = seq->next)
            if (sameString(tName, seq->name))
                break;
        if (seq != NULL)
            {
            AllocVar(tSeq);
            tSeq->size = tEnd - tStart;
            tSeq->name = cloneString(tName);
            tSeq->dna = cloneMem((seq->dna)+tStart, tSeq->size);
            }
        else
            errAbort("sequence not found %s\n",tName);
        }
    else
        tSeq = readFromCache(tCache, tNibDir, tName, tStart, tEnd - tStart, tSize, tIsTwoBit);

/* Loop through blocks copying sequence into dynamic strings. */
for (block = blockList; block != NULL; block = block->next)
    {
    if (lastBlock != NULL)
        {
	int qGap = block->qStart - lastBlock->qEnd;
	int tGap = block->tStart - lastBlock->tEnd;
	if (qGap != 0 && tGap != 0)
	    {
	    errAbort("Gaps in both strand on alignment ending line %d of %s",
	    	lf->lineIx, lf->fileName);
	    }
	if (qGap > 0)
	    {
	    dyStringAppendMultiC(tSym, '-', qGap);
	    dyStringAppendN(qSym, qSeq->dna + lastBlock->qEnd - qStart, qGap);
	    }
	if (tGap > 0)
	    {
	    dyStringAppendMultiC(qSym, '-', tGap);
	    dyStringAppendN(tSym, tSeq->dna + lastBlock->tEnd - tStart, tGap);
	    }
	}
    if (qSeq->size < block->qStart - qStart)
        {
        errAbort("read past end of sequence %s size =%d block->qStart-qstart=%d block->qStart=%d qEnd=%d \n", qName, qSeq->size, block->qStart-qStart,block->qStart, block->qEnd );
        }
    dyStringAppendN(qSym, qSeq->dna + block->qStart - qStart,
    	block->qEnd - block->qStart);
    if (tSeq->size < block->tStart - tStart)
        {
        errAbort("read past end of sequence %s size =%d block->tStart-tstart=%d\n", tName, tSeq->size, block->tStart-tStart);
        }
    dyStringAppendN(tSym, tSeq->dna + block->tStart - tStart,
    	block->tEnd - block->tStart);
    lastBlock = block;
    }
if (qSym->stringSize != tSym->stringSize)
    errAbort("qSize and tSize don't agree in alignment ending line %d of %s",
	    lf->lineIx, lf->fileName);

if (rescore)
    score = axtScoreSym(scoreScheme, qSym->stringSize,
			qSym->string, tSym->string);

/* Fill in an axt and write it to output. */
ZeroVar(&axt);
axt.qName = qName;
axt.qStart = qStart;
axt.qEnd = qEnd;
axt.qStrand = (isRc ? '-' : '+');
axt.tName = tName;
axt.tStart = tStart;
axt.tEnd = tEnd;
axt.tStrand = '+';
axt.score = score;
axt.symCount = qSym->stringSize;
axt.qSym = qSym->string;
axt.tSym = tSym->string;
axtWrite(&axt, f);

/* Clean up. */
if (!qIsFa)
    freeDnaSeq(&qSeq);
freeDnaSeq(&tSeq);
dyStringFree(&qSym);
dyStringFree(&tSym);
}