Example #1
0
void gfiGetSeqName(char *spec, char *name, char *file)
/* Extract sequence name and optionally file name from spec, 
 * which includes nib and 2bit files.  (The file may be NULL
 * if you don't care.) */
{
if (nibIsFile(spec))
    {
    splitPath(spec, NULL, name, NULL);
    if (file != NULL)
        strcpy(file, spec);
    }
else
    {
    char *s = strchr(spec, ':');
    if (s == NULL)
	errAbort("Expecting colon in %s", spec);
    strcpy(name, s+1);
    if (file != NULL)
        {
	int fileNameSize = s - spec;
	memcpy(file, spec, fileNameSize);
	file[fileNameSize] = 0;
	}
    }
}
Example #2
0
struct dnaSeq *gfiExpandAndLoadCached(struct gfRange *range, 
	struct hash *tFileCache, char *tSeqDir, int querySize, 
	int *retTotalSeqSize, boolean respectFrame, boolean isRc, int expansion)
/* Expand range to cover an additional expansion bases on either side.
 * Load up target sequence and return. (Done together because don't
 * know target size before loading.) */
{
struct dnaSeq *target = NULL;
char fileName[PATH_LEN+256];

safef(fileName, sizeof(fileName), "%s/%s", tSeqDir, range->tName);
if (nibIsFile(fileName))
    {
    struct nibInfo *nib = hashFindVal(tFileCache, fileName);
    if (nib == NULL)
        {
	nib = nibInfoNew(fileName);
	hashAdd(tFileCache, fileName, nib);
	}
    if (isRc)
	reverseIntRange(&range->tStart, &range->tEnd, nib->size);
    gfiExpandRange(range, querySize, nib->size, respectFrame, isRc, expansion);
    target = nibLdPart(fileName, nib->f, nib->size, 
    	range->tStart, range->tEnd - range->tStart);
    if (isRc)
	{
	reverseComplement(target->dna, target->size);
	reverseIntRange(&range->tStart, &range->tEnd, nib->size);
	}
    *retTotalSeqSize = nib->size;
    }
else
    {
    struct twoBitFile *tbf = NULL;
    char *tSeqName = strchr(fileName, ':');
    int tSeqSize = 0;
    if (tSeqName == NULL)
        errAbort("No colon in .2bit response from gfServer");
    *tSeqName++ = 0;
    tbf = hashFindVal(tFileCache, fileName);
    if (tbf == NULL)
        {
	tbf = twoBitOpen(fileName);
	hashAdd(tFileCache, fileName, tbf);
	}
    tSeqSize = twoBitSeqSize(tbf, tSeqName);
    if (isRc)
	reverseIntRange(&range->tStart, &range->tEnd, tSeqSize);
    gfiExpandRange(range, querySize, tSeqSize, respectFrame, isRc, expansion);
    target = twoBitReadSeqFragLower(tbf, tSeqName, range->tStart, range->tEnd);
    if (isRc)
	{
	reverseComplement(target->dna, target->size);
	reverseIntRange(&range->tStart, &range->tEnd, tSeqSize);
	}
    *retTotalSeqSize = tSeqSize;
    }
return target;
}
Example #3
0
struct dnaSeq *dnaLoadSingle(char *fileName, int *retStart, int *retEnd, int *retParentSize)
/* Return sequence if it's a nib file or 2bit part, NULL otherwise. */
{
struct dnaSeq *seq = NULL;
unsigned start = 0, end = 0;
int parentSize = 0;
if (nibIsFile(fileName))
    {
    /* Save offset out of fileName for auto-lifting */
    char filePath[PATH_LEN];
    char name[PATH_LEN];
    nibParseName(0, fileName, filePath, name, &start, &end);

    if (end != 0)	/* It's just a range. */
        {
	FILE *f;
	int size;
	nibOpenVerify(filePath, &f, &size);
	parentSize = size;
	}
    seq =  nibLoadAllMasked(NIB_MASK_MIXED, fileName);
    if (end == 0)
         parentSize = end = seq->size;
    freez(&seq->name);
    seq->name = cloneString(name);
    }
else if (twoBitIsRange(fileName))
    {
    /* Save offset out of fileName for auto-lifting */
    char *rangeSpec = cloneString(fileName);
    int start, end;
    char *file, *seqName;
    twoBitParseRange(rangeSpec, &file, &seqName, &start, &end);

    /* Load sequence. */
        {
	struct twoBitFile *tbf = twoBitOpen(file);
	parentSize = twoBitSeqSize(tbf, seqName);
	seq = twoBitReadSeqFrag(tbf, seqName, start, end);
	twoBitClose(&tbf);
	}
    if (end == 0)
        end = seq->size;
    freez(&rangeSpec);
    }
if (retStart != NULL)
    *retStart = start;
if (retEnd != NULL)
    *retEnd = end;
if (retParentSize != NULL)
    *retParentSize = parentSize;
return seq;
}
Example #4
0
static void gfFileCacheFreeEl(struct hashEl *el)
/* Free up one file cache info. */
{
char *name = el->name;
if (nibIsFile(name))
    {
    struct nibInfo *nib = el->val;
    nibInfoFree(&nib);
    }
else
    {
    struct twoBitFile *tbf = el->val;
    twoBitClose(&tbf);
    }
el->val = NULL;
}
Example #5
0
struct hash *loadGeno(char *genoFile)
/* load genome sequences into a hash.  This supports the multi-sequence
 * specs of twoBitLoadAll */
{
struct dnaSeq *genos = NULL, *geno;
struct hash *genoHash = hashNew(0);

if (nibIsFile(genoFile))
    genos = nibLoadAllMasked(NIB_MASK_MIXED|NIB_BASE_NAME, genoFile);
else if (twoBitIsSpec(genoFile))
    genos = twoBitLoadAll(genoFile);
else
    genos = faReadDna(genoFile);

while ((geno = slPopHead(&genos)) != NULL)
    {
    tolowers(geno->dna);
    hashAdd(genoHash, geno->name, geno);
    }
return genoHash;
}
void searchOneIndex(int fileCount, char *files[], struct genoFind *gf, char *outName, 
	boolean isProt, struct hash *maskHash, FILE *outFile, boolean showStatus)
/* Search all sequences in all files against single genoFind index. */
{
int i;
char *fileName;
int count = 0; 
long long totalSize = 0;

gfOutputHead(gvo, outFile);
for (i=0; i<fileCount; ++i)
    {
    fileName = files[i];
    if (nibIsFile(fileName))
        {
	struct dnaSeq *seq;

	if (isProt)
	    errAbort("%s: Can't use .nib files with -prot or d=prot option\n", fileName);
	seq = nibLoadAllMasked(NIB_MASK_MIXED, fileName);
	freez(&seq->name);
	seq->name = cloneString(fileName);
	searchOneMaskTrim(seq, isProt, gf, outFile,
			  maskHash, &totalSize, &count);
	freeDnaSeq(&seq);
	}
    else if (twoBitIsSpec(fileName))
	{
	struct twoBitSpec *tbs = twoBitSpecNew(fileName);
	struct twoBitFile *tbf = twoBitOpen(tbs->fileName);
	if (isProt)
	    errAbort("%s is a two bit file, which doesn't work for proteins.", 
	    	fileName);
	if (tbs->seqs != NULL)
	    {
	    struct twoBitSeqSpec *ss = NULL;
	    for (ss = tbs->seqs;  ss != NULL;  ss = ss->next)
		{
		struct dnaSeq *seq = twoBitReadSeqFrag(tbf, ss->name,
						       ss->start, ss->end);
		searchOneMaskTrim(seq, isProt, gf, outFile,
				  maskHash, &totalSize, &count);
		dnaSeqFree(&seq);
		}
	    }
	else
	    {
	    struct twoBitIndex *index = NULL;
	    for (index = tbf->indexList; index != NULL; index = index->next)
		{
		struct dnaSeq *seq = twoBitReadSeqFrag(tbf, index->name, 0, 0);
		searchOneMaskTrim(seq, isProt, gf, outFile,
				  maskHash, &totalSize, &count);
		dnaSeqFree(&seq);
		}
	    }
	twoBitClose(&tbf);
	}
    else
        {
	static struct dnaSeq seq;
	struct lineFile *lf = lineFileOpen(fileName, TRUE);
	while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name))
	    {
	    searchOneMaskTrim(&seq, isProt, gf, outFile,
			      maskHash, &totalSize, &count);
	    }
	lineFileClose(&lf);
	}
    }
carefulClose(&outFile);
if (showStatus)
    printf("Searched %lld bases in %d sequences\n", totalSize, count);
}