示例#1
0
struct dnaSeq *gfiExpandAndLoadCached(struct gfRange *range, 
	struct hash *tFileCache, char *tSeqDir, int querySize, 
	int *retTotalSeqSize, boolean respectFrame, boolean isRc, int expansion)
/* Expand range to cover an additional expansion bases on either side.
 * Load up target sequence and return. (Done together because don't
 * know target size before loading.) */
{
struct dnaSeq *target = NULL;
char fileName[PATH_LEN+256];

safef(fileName, sizeof(fileName), "%s/%s", tSeqDir, range->tName);
if (nibIsFile(fileName))
    {
    struct nibInfo *nib = hashFindVal(tFileCache, fileName);
    if (nib == NULL)
        {
	nib = nibInfoNew(fileName);
	hashAdd(tFileCache, fileName, nib);
	}
    if (isRc)
	reverseIntRange(&range->tStart, &range->tEnd, nib->size);
    gfiExpandRange(range, querySize, nib->size, respectFrame, isRc, expansion);
    target = nibLdPart(fileName, nib->f, nib->size, 
    	range->tStart, range->tEnd - range->tStart);
    if (isRc)
	{
	reverseComplement(target->dna, target->size);
	reverseIntRange(&range->tStart, &range->tEnd, nib->size);
	}
    *retTotalSeqSize = nib->size;
    }
else
    {
    struct twoBitFile *tbf = NULL;
    char *tSeqName = strchr(fileName, ':');
    int tSeqSize = 0;
    if (tSeqName == NULL)
        errAbort("No colon in .2bit response from gfServer");
    *tSeqName++ = 0;
    tbf = hashFindVal(tFileCache, fileName);
    if (tbf == NULL)
        {
	tbf = twoBitOpen(fileName);
	hashAdd(tFileCache, fileName, tbf);
	}
    tSeqSize = twoBitSeqSize(tbf, tSeqName);
    if (isRc)
	reverseIntRange(&range->tStart, &range->tEnd, tSeqSize);
    gfiExpandRange(range, querySize, tSeqSize, respectFrame, isRc, expansion);
    target = twoBitReadSeqFragLower(tbf, tSeqName, range->tStart, range->tEnd);
    if (isRc)
	{
	reverseComplement(target->dna, target->size);
	reverseIntRange(&range->tStart, &range->tEnd, tSeqSize);
	}
    *retTotalSeqSize = tSeqSize;
    }
return target;
}
示例#2
0
struct dnaSeq *nibTwoCacheSeq(struct nibTwoCache *ntc, char *seqName)
/* Return all of sequence. This will have repeats in lower case. */
{
if (ntc->isTwoBit)
    return twoBitReadSeqFrag(ntc->tbf, seqName, 0, 0);
else
    {
    struct nibInfo *nib = nibInfoFromCache(ntc->nibHash, ntc->pathName, seqName);
    return nibLdPart(nib->fileName, nib->f, nib->size, 0, nib->size);
    }
}
示例#3
0
struct dnaSeq *dnaFromChrom(char *db, char *chrom, int chromStart, int chromEnd, enum dnaCase seqCase)
/** Return the dna for the chromosome region specified. */
{
struct dnaSeq *seq = NULL;
if(chromNib != NULL)
    {
    seq = (struct dnaSeq *)nibLdPart(chromNib, chromNibFile, chromNibSize, 
				     chromStart, chromEnd - chromStart);
    }
else
    seq = hDnaFromSeq(db, chrom, chromStart, chromEnd, seqCase);
return seq;
}
示例#4
0
struct dnaSeq *readCachedNib(struct hash *nibHash, char *nibDir,
	char *chrom, int start, int size)
/* Return sequence using cache of nibs. */
{
struct nibInfo *ni = hashFindVal(nibHash, chrom);
if (ni == NULL)
    {
    char fileName[512];
    sprintf(fileName, "%s/%s.nib", nibDir, chrom);
    AllocVar(ni);
    ni->fileName = cloneString(fileName);
    nibOpenVerify(fileName, &ni->f, &ni->size);
    }
return nibLdPart(ni->fileName, ni->f, ni->size, start, size);
}
struct dnaSeq *loadSomeSeq(struct hash *otherHash, char *chrom, int start, int end)
/* Load sequence from chromosome file referenced in chromTable. */
{
    struct dnaSeq *seq = NULL;
    struct otherSeq *os = hashFindVal(otherHash, chrom);

    if (os != NULL)
    {
        seq = nibLdPart(os->nibFile, os->f, os->chromSize, start, end - start);
    }
    else
    {
        warn("Sequence %s isn't a chromsome", chrom);
    }
    return seq;
}
示例#6
0
void gcSquiggle(char *chromName, char *destDir, char *type, bool thick, bool line)
/* Make gcSquiggle  pic for chromosome. */
{
char gifName[512];
int chromSize = hChromSize(chromName);
struct memGfx *mg = getScaledMg(chromSize, squiggleHeight);
int dotWidth = mg->width;
char nibName[512];
FILE *nibFile;
int nibChromSize;
struct dnaSeq *seq = NULL;
double lastGcPercent = (gcPercentMin+gcPercentMax)/2;
double gcPercent;
int startBase = 0, endBase = 0, baseWidth;
int lastDot = -1, dot;
int realBaseCount;
int gcBaseCount;
bool lastMissing = TRUE;
int squigHeight = squiggleHeight-thick;
int y1,y2;

sprintf(gifName, "%s/%sgc%s.gif", destDir, chromName, type);
sprintf(nibName, "%s/%s.nib", nibDir, chromName);
nibOpenVerify(nibName, &nibFile, &nibChromSize);
if (nibChromSize != chromSize)
    errAbort("Disagreement on size of %s between database and %s\n",
    	chromName, nibName);

for (dot = 0; dot <dotWidth; ++dot)
    {
    startBase = endBase;
    endBase = dotToBase(dot+1);
    if (endBase > nibChromSize)
       endBase = nibChromSize;
    baseWidth = endBase-startBase;
    seq = nibLdPart(nibName, nibFile, nibChromSize, startBase, baseWidth);
    realBaseCount = realDnaCount(seq->dna, seq->size);
    gcBaseCount = gcDnaCount(seq->dna, seq->size);
    if (realBaseCount < 20)
        {
	/* Add psuedocounts from last time if sample is small. */
	lastMissing = TRUE;
	}
    else
        {
	gcPercent = (double)gcBaseCount/(double)realBaseCount;
	y2 = gcScaleRange(gcPercent, squigHeight);
	if (line && !lastMissing)
	    {
	    y1 = gcScaleRange(lastGcPercent, squigHeight);
	    mgDrawLine(mg, dot-1, y1, dot, y2, MG_BLACK);
	    if (thick)
	        {
		mgDrawLine(mg, dot-1, y1+1, dot, y2+1, MG_BLACK);
		}
	    }
	else
	    {
	    mgPutDot(mg, dot, y2, MG_BLACK);
	    if (thick)
	        mgPutDot(mg, dot, y2+1, MG_BLACK);
	    }
	lastGcPercent = gcPercent;
	lastMissing = FALSE;
	}
    freeDnaSeq(&seq);
    }
fclose(nibFile);
mgSaveGif(mg, gifName);
mgFree(&mg);
}
void loadUpDnaSeqs(struct coordConvRep *ccr)
/* get the three dnaSeqs that we are going to align */
{
char query[128];
char nibFileName[512];
struct sqlConnection *conn = sqlConnect(ccr->from->version);
FILE *nib = NULL;
int chromSize;
int querySize=0,midPos=0;
int chromStart,chromEnd,nibStart=0;
nibFileName[0] = '\0';
safef(query, sizeof(query), "select fileName from chromInfo where chrom='%s'",
      ccr->from->chrom);
sqlQuickQuery(conn, query, nibFileName, sizeof(nibFileName));
if(strlen(nibFileName) == 0)
    errAbort("coordConv::loadUpDnaSeqs() - can't find file for chromosome %s.", ccr->from->chrom);
nibOpenVerify(nibFileName, &nib, &chromSize);

/* 
   Now we want to get three pieces of dna representing the middle, 
   and two ends of the sequence in question. If the sequence is smaller
   than 6kb use endpoints of 6kb, otherwise use a 1000bp off of each end.
*/

chromStart = ccr->from->chromStart;
chromEnd =  ccr->from->chromEnd;
querySize = chromEnd - chromStart;
midPos = (chromEnd + chromStart)/2;
if(querySize < 6000)
    {
    /* First the upstream (5') */
    nibStart = midPos -3000;
    if(nibStart < 0) nibStart =0;
    ccr->upSeq = nibLdPart(nibFileName, nib, chromSize,nibStart, ccr->seqSize);
    ccr->upStart = nibStart;
    
    /* Downstream (3') seq */
    nibStart = midPos +2000;
    if(nibStart > (chromSize-ccr->seqSize)) nibStart = chromSize - ccr->seqSize;
    ccr->downSeq = nibLdPart(nibFileName, nib, chromSize,nibStart, ccr->seqSize);
    ccr->downStart = nibStart;

    /* Middle seq */
    nibStart = midPos - (ccr->seqSize/2);
    if(nibStart < 0) nibStart =0;
    if(nibStart > (chromSize - ccr->seqSize)) nibStart = chromSize- ccr->seqSize;
    ccr->midSeq = nibLdPart(nibFileName, nib, chromSize,nibStart, ccr->seqSize);
    ccr->midStart = nibStart;
    }
else 
    {
    /* First the upstream (5') */
    nibStart = chromStart;
    if(nibStart < 0) nibStart =0;
    ccr->upSeq = nibLdPart(nibFileName, nib, chromSize,nibStart, ccr->seqSize);
    ccr->upStart = nibStart;

    /* Downstream (3') seq */
    nibStart = chromEnd-1000;
    if(nibStart > (chromSize -ccr->seqSize)) nibStart = chromSize -ccr->seqSize;
    ccr->downSeq = nibLdPart(nibFileName, nib, chromSize,nibStart, ccr->seqSize);
    ccr->downStart = nibStart;

    /* Middle seq */
    nibStart = midPos - (ccr->seqSize/2);
    if(nibStart < 0) nibStart =0;
    if(nibStart > (chromSize - ccr->seqSize)) nibStart = chromSize- ccr->seqSize;
    ccr->midSeq = nibLdPart(nibFileName, nib, chromSize,nibStart, ccr->seqSize);
    ccr->midStart = nibStart;
    }
carefulClose(&nib);
sqlDisconnect(&conn);
}
示例#8
0
void bedDown(char *database, char *table, char *faName, char *tabName)
/* bedDown - Make stuff to find a BED format submission in a new version. */
{
char query[512];
struct sqlConnection *conn = sqlConnect(database);
struct sqlConnection *conn2 = sqlConnect(database);
struct sqlResult *sr;
char **row;
struct bed bed;
static char nibChrom[64];
int nibStart = 0;
int nibSize = 0;
int nibEnd = 0;
int nibTargetSize = 512*1024;
struct dnaSeq *nibSeq = NULL;
int midPos;
int chromSize;
int s, e, sz;
FILE *fa = mustOpen(faName, "w");
FILE *tab = mustOpen(tabName, "w");
FILE *nib = NULL;
char nibFileName[512];
char seqName[512];
struct agpFrag *chromFragList = NULL, *frag, *fragsLeft = NULL;
int fragPos;
char *destName;
char *destStrand;


sqlSafef(query, sizeof query, "select chrom,chromStart,chromEnd,name from %s order by chrom,chromStart", table);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    bedStaticLoad(row, &bed);

    /* Fix badly cased Exofish chromosomes mostly... */
    if (sameString(bed.chrom, "chrna_random"))
        bed.chrom = "chrNA_random";
    else if (sameString(bed.chrom, "chrul_random"))
        bed.chrom = "chrUL_random";
    else if (sameString(bed.chrom, "chrx_random"))
        bed.chrom = "chrX_random";
    else if (sameString(bed.chrom, "chry_random"))
        bed.chrom = "chrY_random";
    else if (sameString(bed.chrom, "chrx"))
        bed.chrom = "chrX";
    else if (sameString(bed.chrom, "chry"))
        bed.chrom = "chrY";

    if (!sameString(bed.chrom, nibChrom))
        {
	strcpy(nibChrom, bed.chrom);
	nibSize = nibStart = nibEnd = 0;
	sqlSafef(query, sizeof query, "select fileName from chromInfo where chrom = '%s'", bed.chrom);
	sqlQuickQuery(conn2, query, nibFileName, sizeof(nibFileName));
	carefulClose(&nib);
	nibOpenVerify(nibFileName, &nib, &chromSize);
	agpFragFreeList(&chromFragList);
	chromFragList = fragsLeft = loadChromAgp(conn2, bed.chrom);
	printf("%s has %d bases in %d fragments\n", nibFileName, chromSize, slCount(chromFragList));
	}
    midPos = (bed.chromStart + bed.chromEnd)/2;
    s = midPos - 200;
    if (s < 0) s = 0;
    e = midPos + 200;
    if (e > chromSize) e = chromSize;
    sz = e-s;
    if (rangeIntersection(s,e,nibStart,nibEnd) < sz)
        {
	freeDnaSeq(&nibSeq);
	nibStart = s;
	nibSize = nibTargetSize;
	if (nibSize < sz)
	    nibSize = sz;
	nibEnd = nibStart + nibSize;
	if (nibEnd > chromSize)
	    {
	    nibEnd = chromSize;
	    nibSize = nibEnd - nibStart;
	    }
	nibSeq = nibLdPart(nibFileName, nib, chromSize, nibStart, nibSize);
	}
    if (findCoveringFrag(midPos, &fragsLeft, &frag, &fragPos))
        {
	destName = frag->frag;
	destStrand = frag->strand;
	}
    else
	{
	destName = "?";
	fragPos = 0;
	destStrand = "+";
	warn("Couldn't find %s@%s:%d in agpFrag list", bed.name, bed.chrom, midPos);
	}
    fprintf(tab, "%s\t%s\t%s\t%d\t%d\t%d\t%d\t%s\t%s\t%d\n", 
	    bed.name, database, bed.chrom, bed.chromStart, 
	    bed.chromEnd - bed.chromStart, s - bed.chromStart, e-bed.chromStart,
	    destName, destStrand, fragPos);
    sprintf(seqName, "%s.%s.%s.%d", bed.name, database, bed.chrom, 
	bed.chromStart);
    faWriteNext(fa, seqName, nibSeq->dna + s - nibStart, sz);
    }
freeDnaSeq(&nibSeq);
sqlFreeResult(&sr);
sqlDisconnect(&conn);
sqlDisconnect(&conn2);
}