Beispiel #1
0
void twoBitToFa(char *inName, char *outName)
/* twoBitToFa - Convert all or part of twoBit file to fasta. */
{
struct twoBitFile *tbf;
FILE *outFile = mustOpen(outName, "w");
struct twoBitSpec *tbs;

if (clSeq != NULL)
    {
    char seqSpec[2*PATH_LEN];
    if (clEnd > clStart)
        safef(seqSpec, sizeof(seqSpec), "%s:%s:%d-%d", inName, clSeq, clStart, clEnd);
    else
        safef(seqSpec, sizeof(seqSpec), "%s:%s", inName, clSeq);
    tbs = twoBitSpecNew(seqSpec);
    }
else if (clSeqList != NULL)
    tbs = twoBitSpecNewFile(inName, clSeqList);
else
    tbs = twoBitSpecNew(inName);

if (tbs == NULL)
    errAbort("%s is not a twoBit file", inName);

if (tbs->seqs != NULL && clBpt != NULL)
    tbf = twoBitOpenExternalBptIndex(tbs->fileName, clBpt);
else
    tbf = twoBitOpen(tbs->fileName);
if (clBed != NULL)
    {
    processSeqsFromBed(tbf, clBed, outFile);
    }
else
    {
    if (tbs->seqs == NULL)
	processAllSeqs(tbf, outFile);
    else
	processSeqSpecs(tbf, tbs->seqs, outFile);
    }
twoBitSpecFree(&tbs);
carefulClose(&outFile);
twoBitClose(&tbf);
}
void searchOneIndex(int fileCount, char *files[], struct genoFind *gf, char *outName, 
	boolean isProt, struct hash *maskHash, FILE *outFile, boolean showStatus)
/* Search all sequences in all files against single genoFind index. */
{
int i;
char *fileName;
int count = 0; 
long long totalSize = 0;

gfOutputHead(gvo, outFile);
for (i=0; i<fileCount; ++i)
    {
    fileName = files[i];
    if (nibIsFile(fileName))
        {
	struct dnaSeq *seq;

	if (isProt)
	    errAbort("%s: Can't use .nib files with -prot or d=prot option\n", fileName);
	seq = nibLoadAllMasked(NIB_MASK_MIXED, fileName);
	freez(&seq->name);
	seq->name = cloneString(fileName);
	searchOneMaskTrim(seq, isProt, gf, outFile,
			  maskHash, &totalSize, &count);
	freeDnaSeq(&seq);
	}
    else if (twoBitIsSpec(fileName))
	{
	struct twoBitSpec *tbs = twoBitSpecNew(fileName);
	struct twoBitFile *tbf = twoBitOpen(tbs->fileName);
	if (isProt)
	    errAbort("%s is a two bit file, which doesn't work for proteins.", 
	    	fileName);
	if (tbs->seqs != NULL)
	    {
	    struct twoBitSeqSpec *ss = NULL;
	    for (ss = tbs->seqs;  ss != NULL;  ss = ss->next)
		{
		struct dnaSeq *seq = twoBitReadSeqFrag(tbf, ss->name,
						       ss->start, ss->end);
		searchOneMaskTrim(seq, isProt, gf, outFile,
				  maskHash, &totalSize, &count);
		dnaSeqFree(&seq);
		}
	    }
	else
	    {
	    struct twoBitIndex *index = NULL;
	    for (index = tbf->indexList; index != NULL; index = index->next)
		{
		struct dnaSeq *seq = twoBitReadSeqFrag(tbf, index->name, 0, 0);
		searchOneMaskTrim(seq, isProt, gf, outFile,
				  maskHash, &totalSize, &count);
		dnaSeqFree(&seq);
		}
	    }
	twoBitClose(&tbf);
	}
    else
        {
	static struct dnaSeq seq;
	struct lineFile *lf = lineFileOpen(fileName, TRUE);
	while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name))
	    {
	    searchOneMaskTrim(&seq, isProt, gf, outFile,
			      maskHash, &totalSize, &count);
	    }
	lineFileClose(&lf);
	}
    }
carefulClose(&outFile);
if (showStatus)
    printf("Searched %lld bases in %d sequences\n", totalSize, count);
}