Exemple #1
0
struct hash *loadGeno(char *genoFile)
/* load genome sequences into a hash.  This supports the multi-sequence
 * specs of twoBitLoadAll */
{
struct dnaSeq *genos = NULL, *geno;
struct hash *genoHash = hashNew(0);

if (nibIsFile(genoFile))
    genos = nibLoadAllMasked(NIB_MASK_MIXED|NIB_BASE_NAME, genoFile);
else if (twoBitIsSpec(genoFile))
    genos = twoBitLoadAll(genoFile);
else
    genos = faReadDna(genoFile);

while ((geno = slPopHead(&genos)) != NULL)
    {
    tolowers(geno->dna);
    hashAdd(genoHash, geno->name, geno);
    }
return genoHash;
}
void twoBitMask(char *inName, char *maskName, char *outName)
/* twoBitMask - apply masking to a .2bit file, creating a new .2bit file. */
{
    struct hash *tbHash = hashNew(20);
    struct hash *bitmapHash = hashNew(20);
    struct twoBit *twoBitList = NULL;
    struct twoBit *twoBit = NULL;
    FILE *f = NULL;

    if (! twoBitIsFile(inName))
    {
        if (twoBitIsSpec(inName))
            errAbort("Sorry, this works only on whole .2bit files, not specs.");
        else
            errAbort("Input %s does not look like a proper .2bit file.", inName);
    }

    twoBitList = slurpInput(inName, tbHash, bitmapHash);

    /* Read mask data into bitmapHash, store it in twoBits: */
    if ((type && endsWith(type, "bed")) || endsWith(maskName, ".bed"))
        maskWithBed(maskName, tbHash, bitmapHash);
    else if ((type && endsWith(type, "out")) || endsWith(maskName, ".out"))
        maskWithOut(maskName, tbHash, bitmapHash);
    else
        errAbort("Sorry, maskFile must end in \".bed\" or \".out\".");

    /* Create a new .2bit file, write it out from twoBits. */
    f = mustOpen(outName, "wb");
    twoBitWriteHeader(twoBitList, f);
    for (twoBit = twoBitList; twoBit != NULL; twoBit = twoBit->next)
    {
        twoBitWriteOne(twoBit, f);
    }
    carefulClose(&f);

    /* Don't bother freeing twoBitList and hashes here -- just exit. */
}
void searchOneIndex(int fileCount, char *files[], struct genoFind *gf, char *outName, 
	boolean isProt, struct hash *maskHash, FILE *outFile, boolean showStatus)
/* Search all sequences in all files against single genoFind index. */
{
int i;
char *fileName;
int count = 0; 
long long totalSize = 0;

gfOutputHead(gvo, outFile);
for (i=0; i<fileCount; ++i)
    {
    fileName = files[i];
    if (nibIsFile(fileName))
        {
	struct dnaSeq *seq;

	if (isProt)
	    errAbort("%s: Can't use .nib files with -prot or d=prot option\n", fileName);
	seq = nibLoadAllMasked(NIB_MASK_MIXED, fileName);
	freez(&seq->name);
	seq->name = cloneString(fileName);
	searchOneMaskTrim(seq, isProt, gf, outFile,
			  maskHash, &totalSize, &count);
	freeDnaSeq(&seq);
	}
    else if (twoBitIsSpec(fileName))
	{
	struct twoBitSpec *tbs = twoBitSpecNew(fileName);
	struct twoBitFile *tbf = twoBitOpen(tbs->fileName);
	if (isProt)
	    errAbort("%s is a two bit file, which doesn't work for proteins.", 
	    	fileName);
	if (tbs->seqs != NULL)
	    {
	    struct twoBitSeqSpec *ss = NULL;
	    for (ss = tbs->seqs;  ss != NULL;  ss = ss->next)
		{
		struct dnaSeq *seq = twoBitReadSeqFrag(tbf, ss->name,
						       ss->start, ss->end);
		searchOneMaskTrim(seq, isProt, gf, outFile,
				  maskHash, &totalSize, &count);
		dnaSeqFree(&seq);
		}
	    }
	else
	    {
	    struct twoBitIndex *index = NULL;
	    for (index = tbf->indexList; index != NULL; index = index->next)
		{
		struct dnaSeq *seq = twoBitReadSeqFrag(tbf, index->name, 0, 0);
		searchOneMaskTrim(seq, isProt, gf, outFile,
				  maskHash, &totalSize, &count);
		dnaSeqFree(&seq);
		}
	    }
	twoBitClose(&tbf);
	}
    else
        {
	static struct dnaSeq seq;
	struct lineFile *lf = lineFileOpen(fileName, TRUE);
	while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name))
	    {
	    searchOneMaskTrim(&seq, isProt, gf, outFile,
			      maskHash, &totalSize, &count);
	    }
	lineFileClose(&lf);
	}
    }
carefulClose(&outFile);
if (showStatus)
    printf("Searched %lld bases in %d sequences\n", totalSize, count);
}