コード例 #1
0
ファイル: altSplice.c プロジェクト: apmagalhaes/kentUtils
void initializeChromNib(char *fileName)
/** Setup things to use the local nib sequence file
    instead of using the database. */
{
chromNib = fileName;
nibOpenVerify(fileName, &chromNibFile, &chromNibSize);
}
コード例 #2
0
ファイル: chainToPsl.c プロジェクト: Nicholas-NVS/kentUtils
FILE *openFromCache(struct dlList *cache, struct seqFilePos *sfp)
/* Return open file handle via cache.  The simple logic here
 * depends on not more than N files being returned at once. */
{
static int maxCacheSize=16;
int cacheSize = 0;
struct dlNode *node;
struct cachedFile *cf;
int size;

/* First loop through trying to find it in cache, counting
 * cache size as we go. */
for (node = cache->head; !dlEnd(node); node = node->next)
    {
    ++cacheSize;
    cf = node->val;
    if (sameString(sfp->file, cf->name))
        {
	dlRemove(node);
	dlAddHead(cache, node);
	return cf->f;
	}
    }

/* If cache has reached max size free least recently used. */
if (cacheSize >= maxCacheSize)
    {
    node = dlPopTail(cache);
    cf = node->val;
    carefulClose(&cf->f);
    freeMem(cf->name);
    freeMem(cf);
    freeMem(node);
    }

/* Cache new file. */
AllocVar(cf);
cf->name = cloneString(sfp->file);
if (sfp->isTwoBit)
    {
    cf->f = (FILE *)twoBitOpen(sfp->file);
    }
else if (sfp->isNib)
    {
    nibOpenVerify(sfp->file, &cf->f, &size);
    if (cf->f == NULL)
	errAbort("can't open nibfile %s\n",sfp->file);
    sfp->pos = size;
    }
else
    cf->f = mustOpen(sfp->file, "rb");
dlAddValHead(cache, cf);
return cf->f;
}
コード例 #3
0
ファイル: dnaLoad.c プロジェクト: Bioinformaticsnl/SimSeq
struct dnaSeq *dnaLoadSingle(char *fileName, int *retStart, int *retEnd, int *retParentSize)
/* Return sequence if it's a nib file or 2bit part, NULL otherwise. */
{
struct dnaSeq *seq = NULL;
unsigned start = 0, end = 0;
int parentSize = 0;
if (nibIsFile(fileName))
    {
    /* Save offset out of fileName for auto-lifting */
    char filePath[PATH_LEN];
    char name[PATH_LEN];
    nibParseName(0, fileName, filePath, name, &start, &end);

    if (end != 0)	/* It's just a range. */
        {
	FILE *f;
	int size;
	nibOpenVerify(filePath, &f, &size);
	parentSize = size;
	}
    seq =  nibLoadAllMasked(NIB_MASK_MIXED, fileName);
    if (end == 0)
         parentSize = end = seq->size;
    freez(&seq->name);
    seq->name = cloneString(name);
    }
else if (twoBitIsRange(fileName))
    {
    /* Save offset out of fileName for auto-lifting */
    char *rangeSpec = cloneString(fileName);
    int start, end;
    char *file, *seqName;
    twoBitParseRange(rangeSpec, &file, &seqName, &start, &end);

    /* Load sequence. */
        {
	struct twoBitFile *tbf = twoBitOpen(file);
	parentSize = twoBitSeqSize(tbf, seqName);
	seq = twoBitReadSeqFrag(tbf, seqName, start, end);
	twoBitClose(&tbf);
	}
    if (end == 0)
        end = seq->size;
    freez(&rangeSpec);
    }
if (retStart != NULL)
    *retStart = start;
if (retEnd != NULL)
    *retEnd = end;
if (retParentSize != NULL)
    *retParentSize = parentSize;
return seq;
}
コード例 #4
0
ファイル: hgNibSeq.c プロジェクト: elmargb/kentUtils
void hgNibSeq(char *database, char *destDir, int faCount, char *faNames[])
/* hgNibSeq - convert DNA to nibble-a-base and store location in database. */
{
char dir[256], name[128], chromName[128], ext[64];
char nibName[512];
struct sqlConnection *conn = sqlConnect(database);
char query[512];
int i;
char *faName;
struct dnaSeq *seq = NULL;
unsigned long total = 0;
int size;

if (!strchr(destDir, '/'))
   errAbort("Use full path name for nib file dir\n");

makeDir(destDir);
if ((!appendTbl) || !sqlTableExists(conn, tableName))
    createTable(conn);
for (i=0; i<faCount; ++i)
    {
    faName = faNames[i];
    splitPath(faName, dir, name, ext);
    sprintf(nibName, "%s/%s.nib", destDir, name);
    printf("Processing %s to %s\n", faName, nibName);
    if (preMadeNib)
        {
	FILE *nibFile;
	nibOpenVerify(nibName, &nibFile, &size);
	carefulClose(&nibFile);
	}
    else
	{
	seq = faReadDna(faName);
	if (seq != NULL)
	    {
	    size = seq->size;
	    uglyf("Read DNA\n");
	    nibWrite(seq, nibName);
	    uglyf("Wrote nib\n");
	    freeDnaSeq(&seq);
	    }
	}
    strcpy(chromName, chromPrefix);
    strcat(chromName, name);
    sqlSafef(query, sizeof query, "INSERT into %s VALUES('%s', %d, '%s')",
        tableName, chromName, size, nibName);
    sqlUpdate(conn,query);
    total += size;
    }
sqlDisconnect(&conn);
printf("%lu total bases\n", total);
}
コード例 #5
0
ファイル: correctEst.c プロジェクト: elmargb/kentUtils
struct dnaSeq *readCachedNib(struct hash *nibHash, char *nibDir,
	char *chrom, int start, int size)
/* Return sequence using cache of nibs. */
{
struct nibInfo *ni = hashFindVal(nibHash, chrom);
if (ni == NULL)
    {
    char fileName[512];
    sprintf(fileName, "%s/%s.nib", nibDir, chrom);
    AllocVar(ni);
    ni->fileName = cloneString(fileName);
    nibOpenVerify(fileName, &ni->f, &ni->size);
    }
return nibLdPart(ni->fileName, ni->f, ni->size, start, size);
}
コード例 #6
0
ファイル: pslPretty.c プロジェクト: SHuang-Broad/SnowTools
void addNib(char *file, struct hash *fileHash, struct hash *seqHash)
/* Add a nib file to hashes. */
{
struct seqFilePos *sfp;
char root[128];
int size;
FILE *f = NULL;
splitPath(file, NULL, root, NULL);
AllocVar(sfp);
hashAddSaveName(seqHash, root, sfp, &sfp->name);
sfp->file = hashStoreName(fileHash, file);
sfp->isNib = TRUE;
nibOpenVerify(file, &f, &size);
sfp->pos = size;
}
コード例 #7
0
ファイル: axtLib.c プロジェクト: blumroy/kentUtils
struct axt *netFillToAxt(struct cnFill *fill, struct dnaSeq *tChrom , int tSize,
	struct hash *qChromHash, char *nibDir,
	struct chain *chain, boolean swap)
/* Convert subset of chain as defined by fill to axt. swap query and target if swap is true*/
{
struct dnaSeq *qSeq;
boolean isRev = (chain->qStrand == '-');
struct chain *subChain, *chainToFree;
int qOffset;
struct axt *axtList = NULL , *axt;
struct nibInfo *nib = hashFindVal(qChromHash, fill->qName);

/* Get query sequence fragment. */
    {
    if (nib == NULL)
        {
	char path[512];
	AllocVar(nib);
	safef(path, sizeof(path), "%s/%s.nib", nibDir, fill->qName);
	nib->fileName = cloneString(path);
	nibOpenVerify(path, &nib->f, &nib->size);
	hashAdd(qChromHash, fill->qName, nib);
	}
    qSeq = nibLoadPartMasked(NIB_MASK_MIXED, nib->fileName, 
    	fill->qStart, fill->qSize);
    if (isRev)
	{
        reverseComplement(qSeq->dna, qSeq->size);
	qOffset = nib->size - (fill->qStart + fill->qSize);
	}
    else
	qOffset = fill->qStart;
    }
chainSubsetOnT(chain, fill->tStart, fill->tStart + fill->tSize, 
	&subChain, &chainToFree);
if (subChain != NULL)
    {
    axtList = chainToAxt(subChain, qSeq, qOffset, tChrom, fill->tStart, 100, BIGNUM);
    if (swap)
        {
        for (axt = axtList ; axt != NULL ; axt = axt->next)
            axtSwap(axt, tSize, nib->size);
        }
    }
chainFree(&chainToFree);
freeDnaSeq(&qSeq);
return axtList;
}
コード例 #8
0
struct cachedSeqFile *openNibFromCache(struct dlList *cache, char *dirName, char *seqName)
/* Return open file handle via cache.  */
{
static int maxCacheSize=32;
int cacheSize = 0;
struct dlNode *node;
struct cachedSeqFile *cn;
char fileName[512];

/* First loop through trying to find it in cache, counting
 * cache size as we go. */
for (node = cache->head; !dlEnd(node); node = node->next)
    {
    ++cacheSize;
    cn = node->val;
    if (sameString(seqName, cn->name))
        {
	dlRemove(node);
	dlAddHead(cache, node);
	return cn;
	}
    }

/* If cache has reached max size free least recently used. */
if (cacheSize >= maxCacheSize)
    {
    node = dlPopTail(cache);
    cn = node->val;
    cachedSeqFileFree(&cn);
    freeMem(node);
    }

/* Cache new file. */
AllocVar(cn);
cn->name = cloneString(seqName);
snprintf(fileName, sizeof(fileName), "%s/%s.nib", dirName, seqName);
cn->fileName = cloneString(fileName);
nibOpenVerify(fileName, &cn->f, &cn->size);
dlAddValHead(cache, cn);
return cn;
}
コード例 #9
0
struct hash *makeOtherHash(char *database, char *table)
/* Make otherSeq valued hash of other sequences */
{
    char query[256];
    struct hash *hash = newHash(7);
    struct sqlConnection *conn = hAllocConn(database);
    struct sqlResult *sr;
    char **row;
    struct otherSeq *os;

    sprintf(query, "select chrom,fileName from %s", table);
    sr = sqlGetResult(conn, query);
    while ((row = sqlNextRow(sr)) != NULL)
    {
        AllocVar(os);
        hashAddSaveName(hash, row[0], os, &os->name);
        os->nibFile = cloneString(row[1]);
        nibOpenVerify(os->nibFile, &os->f, &os->chromSize);
    }
    hFreeConn(&conn);
    return hash;
}
コード例 #10
0
void gcSquiggle(char *chromName, char *destDir, char *type, bool thick, bool line)
/* Make gcSquiggle  pic for chromosome. */
{
char gifName[512];
int chromSize = hChromSize(chromName);
struct memGfx *mg = getScaledMg(chromSize, squiggleHeight);
int dotWidth = mg->width;
char nibName[512];
FILE *nibFile;
int nibChromSize;
struct dnaSeq *seq = NULL;
double lastGcPercent = (gcPercentMin+gcPercentMax)/2;
double gcPercent;
int startBase = 0, endBase = 0, baseWidth;
int lastDot = -1, dot;
int realBaseCount;
int gcBaseCount;
bool lastMissing = TRUE;
int squigHeight = squiggleHeight-thick;
int y1,y2;

sprintf(gifName, "%s/%sgc%s.gif", destDir, chromName, type);
sprintf(nibName, "%s/%s.nib", nibDir, chromName);
nibOpenVerify(nibName, &nibFile, &nibChromSize);
if (nibChromSize != chromSize)
    errAbort("Disagreement on size of %s between database and %s\n",
    	chromName, nibName);

for (dot = 0; dot <dotWidth; ++dot)
    {
    startBase = endBase;
    endBase = dotToBase(dot+1);
    if (endBase > nibChromSize)
       endBase = nibChromSize;
    baseWidth = endBase-startBase;
    seq = nibLdPart(nibName, nibFile, nibChromSize, startBase, baseWidth);
    realBaseCount = realDnaCount(seq->dna, seq->size);
    gcBaseCount = gcDnaCount(seq->dna, seq->size);
    if (realBaseCount < 20)
        {
	/* Add psuedocounts from last time if sample is small. */
	lastMissing = TRUE;
	}
    else
        {
	gcPercent = (double)gcBaseCount/(double)realBaseCount;
	y2 = gcScaleRange(gcPercent, squigHeight);
	if (line && !lastMissing)
	    {
	    y1 = gcScaleRange(lastGcPercent, squigHeight);
	    mgDrawLine(mg, dot-1, y1, dot, y2, MG_BLACK);
	    if (thick)
	        {
		mgDrawLine(mg, dot-1, y1+1, dot, y2+1, MG_BLACK);
		}
	    }
	else
	    {
	    mgPutDot(mg, dot, y2, MG_BLACK);
	    if (thick)
	        mgPutDot(mg, dot, y2+1, MG_BLACK);
	    }
	lastGcPercent = gcPercent;
	lastMissing = FALSE;
	}
    freeDnaSeq(&seq);
    }
fclose(nibFile);
mgSaveGif(mg, gifName);
mgFree(&mg);
}
コード例 #11
0
void loadUpDnaSeqs(struct coordConvRep *ccr)
/* get the three dnaSeqs that we are going to align */
{
char query[128];
char nibFileName[512];
struct sqlConnection *conn = sqlConnect(ccr->from->version);
FILE *nib = NULL;
int chromSize;
int querySize=0,midPos=0;
int chromStart,chromEnd,nibStart=0;
nibFileName[0] = '\0';
safef(query, sizeof(query), "select fileName from chromInfo where chrom='%s'",
      ccr->from->chrom);
sqlQuickQuery(conn, query, nibFileName, sizeof(nibFileName));
if(strlen(nibFileName) == 0)
    errAbort("coordConv::loadUpDnaSeqs() - can't find file for chromosome %s.", ccr->from->chrom);
nibOpenVerify(nibFileName, &nib, &chromSize);

/* 
   Now we want to get three pieces of dna representing the middle, 
   and two ends of the sequence in question. If the sequence is smaller
   than 6kb use endpoints of 6kb, otherwise use a 1000bp off of each end.
*/

chromStart = ccr->from->chromStart;
chromEnd =  ccr->from->chromEnd;
querySize = chromEnd - chromStart;
midPos = (chromEnd + chromStart)/2;
if(querySize < 6000)
    {
    /* First the upstream (5') */
    nibStart = midPos -3000;
    if(nibStart < 0) nibStart =0;
    ccr->upSeq = nibLdPart(nibFileName, nib, chromSize,nibStart, ccr->seqSize);
    ccr->upStart = nibStart;
    
    /* Downstream (3') seq */
    nibStart = midPos +2000;
    if(nibStart > (chromSize-ccr->seqSize)) nibStart = chromSize - ccr->seqSize;
    ccr->downSeq = nibLdPart(nibFileName, nib, chromSize,nibStart, ccr->seqSize);
    ccr->downStart = nibStart;

    /* Middle seq */
    nibStart = midPos - (ccr->seqSize/2);
    if(nibStart < 0) nibStart =0;
    if(nibStart > (chromSize - ccr->seqSize)) nibStart = chromSize- ccr->seqSize;
    ccr->midSeq = nibLdPart(nibFileName, nib, chromSize,nibStart, ccr->seqSize);
    ccr->midStart = nibStart;
    }
else 
    {
    /* First the upstream (5') */
    nibStart = chromStart;
    if(nibStart < 0) nibStart =0;
    ccr->upSeq = nibLdPart(nibFileName, nib, chromSize,nibStart, ccr->seqSize);
    ccr->upStart = nibStart;

    /* Downstream (3') seq */
    nibStart = chromEnd-1000;
    if(nibStart > (chromSize -ccr->seqSize)) nibStart = chromSize -ccr->seqSize;
    ccr->downSeq = nibLdPart(nibFileName, nib, chromSize,nibStart, ccr->seqSize);
    ccr->downStart = nibStart;

    /* Middle seq */
    nibStart = midPos - (ccr->seqSize/2);
    if(nibStart < 0) nibStart =0;
    if(nibStart > (chromSize - ccr->seqSize)) nibStart = chromSize- ccr->seqSize;
    ccr->midSeq = nibLdPart(nibFileName, nib, chromSize,nibStart, ccr->seqSize);
    ccr->midStart = nibStart;
    }
carefulClose(&nib);
sqlDisconnect(&conn);
}
コード例 #12
0
ファイル: bedDown.c プロジェクト: blumroy/kentUtils
void bedDown(char *database, char *table, char *faName, char *tabName)
/* bedDown - Make stuff to find a BED format submission in a new version. */
{
char query[512];
struct sqlConnection *conn = sqlConnect(database);
struct sqlConnection *conn2 = sqlConnect(database);
struct sqlResult *sr;
char **row;
struct bed bed;
static char nibChrom[64];
int nibStart = 0;
int nibSize = 0;
int nibEnd = 0;
int nibTargetSize = 512*1024;
struct dnaSeq *nibSeq = NULL;
int midPos;
int chromSize;
int s, e, sz;
FILE *fa = mustOpen(faName, "w");
FILE *tab = mustOpen(tabName, "w");
FILE *nib = NULL;
char nibFileName[512];
char seqName[512];
struct agpFrag *chromFragList = NULL, *frag, *fragsLeft = NULL;
int fragPos;
char *destName;
char *destStrand;


sqlSafef(query, sizeof query, "select chrom,chromStart,chromEnd,name from %s order by chrom,chromStart", table);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    bedStaticLoad(row, &bed);

    /* Fix badly cased Exofish chromosomes mostly... */
    if (sameString(bed.chrom, "chrna_random"))
        bed.chrom = "chrNA_random";
    else if (sameString(bed.chrom, "chrul_random"))
        bed.chrom = "chrUL_random";
    else if (sameString(bed.chrom, "chrx_random"))
        bed.chrom = "chrX_random";
    else if (sameString(bed.chrom, "chry_random"))
        bed.chrom = "chrY_random";
    else if (sameString(bed.chrom, "chrx"))
        bed.chrom = "chrX";
    else if (sameString(bed.chrom, "chry"))
        bed.chrom = "chrY";

    if (!sameString(bed.chrom, nibChrom))
        {
	strcpy(nibChrom, bed.chrom);
	nibSize = nibStart = nibEnd = 0;
	sqlSafef(query, sizeof query, "select fileName from chromInfo where chrom = '%s'", bed.chrom);
	sqlQuickQuery(conn2, query, nibFileName, sizeof(nibFileName));
	carefulClose(&nib);
	nibOpenVerify(nibFileName, &nib, &chromSize);
	agpFragFreeList(&chromFragList);
	chromFragList = fragsLeft = loadChromAgp(conn2, bed.chrom);
	printf("%s has %d bases in %d fragments\n", nibFileName, chromSize, slCount(chromFragList));
	}
    midPos = (bed.chromStart + bed.chromEnd)/2;
    s = midPos - 200;
    if (s < 0) s = 0;
    e = midPos + 200;
    if (e > chromSize) e = chromSize;
    sz = e-s;
    if (rangeIntersection(s,e,nibStart,nibEnd) < sz)
        {
	freeDnaSeq(&nibSeq);
	nibStart = s;
	nibSize = nibTargetSize;
	if (nibSize < sz)
	    nibSize = sz;
	nibEnd = nibStart + nibSize;
	if (nibEnd > chromSize)
	    {
	    nibEnd = chromSize;
	    nibSize = nibEnd - nibStart;
	    }
	nibSeq = nibLdPart(nibFileName, nib, chromSize, nibStart, nibSize);
	}
    if (findCoveringFrag(midPos, &fragsLeft, &frag, &fragPos))
        {
	destName = frag->frag;
	destStrand = frag->strand;
	}
    else
	{
	destName = "?";
	fragPos = 0;
	destStrand = "+";
	warn("Couldn't find %s@%s:%d in agpFrag list", bed.name, bed.chrom, midPos);
	}
    fprintf(tab, "%s\t%s\t%s\t%d\t%d\t%d\t%d\t%s\t%s\t%d\n", 
	    bed.name, database, bed.chrom, bed.chromStart, 
	    bed.chromEnd - bed.chromStart, s - bed.chromStart, e-bed.chromStart,
	    destName, destStrand, fragPos);
    sprintf(seqName, "%s.%s.%s.%d", bed.name, database, bed.chrom, 
	bed.chromStart);
    faWriteNext(fa, seqName, nibSeq->dna + s - nibStart, sz);
    }
freeDnaSeq(&nibSeq);
sqlFreeResult(&sr);
sqlDisconnect(&conn);
sqlDisconnect(&conn2);
}