Ejemplo n.º 1
0
void snpMaskChrom(char *tableName, char *nibFile, char *outFile)
/* snpMaskChrom - Print a nib file as a fasta file, using IUPAC codes for single base substitutions. */
{
struct dnaSeq *seq;
char *ptr;
struct snpSimple *snps = NULL;
struct snpSimple *snp = NULL;
boolean inRep = FALSE;

seq = nibLoadAllMasked(NIB_MASK_MIXED, nibFile);
ptr = seq->dna;
snps = readSnpsFromChrom(tableName, chromName);

/* do all substitutions */

for (snp = snps; snp != NULL; snp = snp->next)
    {
    if (islower(ptr[snp->chromStart])) inRep = TRUE;
    else inRep = FALSE;
    ptr[snp->chromStart] = iupac(snp->name, snp->observed, ptr[snp->chromStart]);
    if (inRep)
        ptr[snp->chromStart] = tolower(ptr[snp->chromStart]);
    }

faWrite(outFile, chromName, seq->dna, seq->size);
snpSimpleFreeList(&snps);
dnaSeqFree(&seq);  

}
void loadIfNewSeq(char *nibDir, char *newName, char strand, 
	char **pName, struct dnaSeq **pSeq, char *pStrand)
/* Load sequence unless it is already loaded.  Reverse complement
 * if necessary. */
{
struct dnaSeq *seq;
if (sameString(newName, *pName))
    {
    if (strand != *pStrand)
        {
	seq = *pSeq;
	reverseComplement(seq->dna, seq->size);
	*pStrand = strand;
	}
    }
else
    {
    char fileName[512];
    freeDnaSeq(pSeq);
    snprintf(fileName, sizeof(fileName), "%s/%s.nib", nibDir, newName);
    *pName = newName;
    *pSeq = seq = nibLoadAllMasked(NIB_MASK_MIXED, fileName);
    *pStrand = strand;
    if (strand == '-')
        reverseComplement(seq->dna, seq->size);
    uglyf("Loaded %d bases in %s\n", seq->size, fileName);
    }
}
Ejemplo n.º 3
0
struct dnaSeq *dnaLoadSingle(char *fileName, int *retStart, int *retEnd, int *retParentSize)
/* Return sequence if it's a nib file or 2bit part, NULL otherwise. */
{
struct dnaSeq *seq = NULL;
unsigned start = 0, end = 0;
int parentSize = 0;
if (nibIsFile(fileName))
    {
    /* Save offset out of fileName for auto-lifting */
    char filePath[PATH_LEN];
    char name[PATH_LEN];
    nibParseName(0, fileName, filePath, name, &start, &end);

    if (end != 0)	/* It's just a range. */
        {
	FILE *f;
	int size;
	nibOpenVerify(filePath, &f, &size);
	parentSize = size;
	}
    seq =  nibLoadAllMasked(NIB_MASK_MIXED, fileName);
    if (end == 0)
         parentSize = end = seq->size;
    freez(&seq->name);
    seq->name = cloneString(name);
    }
else if (twoBitIsRange(fileName))
    {
    /* Save offset out of fileName for auto-lifting */
    char *rangeSpec = cloneString(fileName);
    int start, end;
    char *file, *seqName;
    twoBitParseRange(rangeSpec, &file, &seqName, &start, &end);

    /* Load sequence. */
        {
	struct twoBitFile *tbf = twoBitOpen(file);
	parentSize = twoBitSeqSize(tbf, seqName);
	seq = twoBitReadSeqFrag(tbf, seqName, start, end);
	twoBitClose(&tbf);
	}
    if (end == 0)
        end = seq->size;
    freez(&rangeSpec);
    }
if (retStart != NULL)
    *retStart = start;
if (retEnd != NULL)
    *retEnd = end;
if (retParentSize != NULL)
    *retParentSize = parentSize;
return seq;
}
Ejemplo n.º 4
0
struct dnaSeq *nibTwoLoadOne(char *pathName, char *seqName)
/* Return sequence from a directory full of nibs or a .2bit file. 
 * The sequence will have repeats in lower case. */
{
struct dnaSeq *seq;
if (twoBitIsFile(pathName))
    {
    struct twoBitFile *tbf = twoBitOpen(pathName);
    seq = twoBitReadSeqFrag(tbf, seqName, 0, 0);
    twoBitClose(&tbf);
    }
else
    {
    char path[512];
    safef(path, sizeof(path), "%s/%s.nib", pathName, seqName);
    seq = nibLoadAllMasked(NIB_MASK_MIXED, path);
    }
return seq;
}
Ejemplo n.º 5
0
struct hash *loadGeno(char *genoFile)
/* load genome sequences into a hash.  This supports the multi-sequence
 * specs of twoBitLoadAll */
{
struct dnaSeq *genos = NULL, *geno;
struct hash *genoHash = hashNew(0);

if (nibIsFile(genoFile))
    genos = nibLoadAllMasked(NIB_MASK_MIXED|NIB_BASE_NAME, genoFile);
else if (twoBitIsSpec(genoFile))
    genos = twoBitLoadAll(genoFile);
else
    genos = faReadDna(genoFile);

while ((geno = slPopHead(&genos)) != NULL)
    {
    tolowers(geno->dna);
    hashAdd(genoHash, geno->name, geno);
    }
return genoHash;
}
void loadIfNewSeq(char *seqPath, boolean isTwoBit, char *newName, char strand, 
	char **pName, struct dnaSeq **pSeq, char *pStrand)
/* Load sequence unless it is already loaded.  Reverse complement
 * if necessary. */
{
struct dnaSeq *seq;
if (sameString(newName, *pName))
    {
    if (strand != *pStrand)
        {
	seq = *pSeq;
	reverseComplement(seq->dna, seq->size);
	*pStrand = strand;
	}
    }
else
    {
    char fileName[512];
    freeDnaSeq(pSeq);
    if (isTwoBit)
        {
	struct twoBitFile *tbf = twoBitOpenCached(seqPath);
	*pSeq = seq = twoBitReadSeqFrag(tbf, newName, 0, 0);
	verbose(1, "Loaded %d bases of %s from %s\n", seq->size, newName, seqPath);
	}
    else
	{
	snprintf(fileName, sizeof(fileName), "%s/%s.nib", seqPath, newName);
	*pSeq = seq = nibLoadAllMasked(NIB_MASK_MIXED, fileName);
	verbose(1, "Loaded %d bases in %s\n", seq->size, fileName);
	}
    *pName = newName;
    *pStrand = strand;
    if (strand == '-')
	reverseComplement(seq->dna, seq->size);
    }
}
Ejemplo n.º 7
0
void snpMask(char *nibFile, char *outFile)
/* snpMask - Print a nib file, using IUPAC codes for single base substitutions. */
{
struct dnaSeq *seq;
char *ptr;
struct snp *snps = NULL;
struct snp *snp = NULL;

seq = nibLoadAllMasked(NIB_MASK_MIXED, nibFile);
ptr = seq->dna;
snps = readSnpsFromChrom(chromName);
printf("got all snps in %s\n", chromName);

/* do all substitutions */
for (snp = snps; snp != NULL; snp = snp->next)
    {
    ptr[snp->chromStart] = iupac(snp->name, snp->observed, ptr[snp->chromStart]);
    }

if (printSnps)
    {
    for (snp = snps; snp != NULL; snp = snp->next)
        {
        printSnpSeq(snp, seq);
        }
    }

if (printChrom)
    faWrite(outFile, chromName, seq->dna, seq->size);

snpFreeList(&snps);

if (printGenes) doPrintGenes(chromName, seq);

dnaSeqFree(&seq);  

}
void searchOneIndex(int fileCount, char *files[], struct genoFind *gf, char *outName, 
	boolean isProt, struct hash *maskHash, FILE *outFile, boolean showStatus)
/* Search all sequences in all files against single genoFind index. */
{
int i;
char *fileName;
int count = 0; 
long long totalSize = 0;

gfOutputHead(gvo, outFile);
for (i=0; i<fileCount; ++i)
    {
    fileName = files[i];
    if (nibIsFile(fileName))
        {
	struct dnaSeq *seq;

	if (isProt)
	    errAbort("%s: Can't use .nib files with -prot or d=prot option\n", fileName);
	seq = nibLoadAllMasked(NIB_MASK_MIXED, fileName);
	freez(&seq->name);
	seq->name = cloneString(fileName);
	searchOneMaskTrim(seq, isProt, gf, outFile,
			  maskHash, &totalSize, &count);
	freeDnaSeq(&seq);
	}
    else if (twoBitIsSpec(fileName))
	{
	struct twoBitSpec *tbs = twoBitSpecNew(fileName);
	struct twoBitFile *tbf = twoBitOpen(tbs->fileName);
	if (isProt)
	    errAbort("%s is a two bit file, which doesn't work for proteins.", 
	    	fileName);
	if (tbs->seqs != NULL)
	    {
	    struct twoBitSeqSpec *ss = NULL;
	    for (ss = tbs->seqs;  ss != NULL;  ss = ss->next)
		{
		struct dnaSeq *seq = twoBitReadSeqFrag(tbf, ss->name,
						       ss->start, ss->end);
		searchOneMaskTrim(seq, isProt, gf, outFile,
				  maskHash, &totalSize, &count);
		dnaSeqFree(&seq);
		}
	    }
	else
	    {
	    struct twoBitIndex *index = NULL;
	    for (index = tbf->indexList; index != NULL; index = index->next)
		{
		struct dnaSeq *seq = twoBitReadSeqFrag(tbf, index->name, 0, 0);
		searchOneMaskTrim(seq, isProt, gf, outFile,
				  maskHash, &totalSize, &count);
		dnaSeqFree(&seq);
		}
	    }
	twoBitClose(&tbf);
	}
    else
        {
	static struct dnaSeq seq;
	struct lineFile *lf = lineFileOpen(fileName, TRUE);
	while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name))
	    {
	    searchOneMaskTrim(&seq, isProt, gf, outFile,
			      maskHash, &totalSize, &count);
	    }
	lineFileClose(&lf);
	}
    }
carefulClose(&outFile);
if (showStatus)
    printf("Searched %lld bases in %d sequences\n", totalSize, count);
}