Пример #1
0
void snpMaskGenes(char *nibFile, char *outFile)
/* snpMaskGenes - Print gene sequence, exons only, 
   using IUPAC codes for single base substitutions. */
{
struct genePred *genes = NULL;
struct genePred *gene = NULL;
struct dnaSeq *seq;
char *ptr;
struct snpSimple *snps = NULL;
struct snpSimple *snp = NULL;
int snpPos = 0;
int size = 0;
FILE *fileHandle = mustOpen(outFile, "w");

genes = readGenes(chromName);

for (gene = genes; gene != NULL; gene = gene->next)
    {
    verbose(4, "gene = %s\n", gene->name);

    snps = readSnpsFromGene(gene, chromName);

    size = gene->txEnd - gene->txStart;
    assert(size > 0);
    AllocVar(seq);
    seq->dna = needLargeMem(size+1);
    seq = nibLoadPartMasked(NIB_MASK_MIXED, nibFile, gene->txStart, size);

    ptr = seq->dna;

    /* do substitutions */
    /* including introns; doesn't take much time, keeps code clean */
    for (snp = snps; snp != NULL; snp = snp->next)
        {
	snpPos = snp->chromStart - gene->txStart;
	assert(snpPos >= 0);
	verbose(5, "before substitution %c\n", ptr[snpPos]);
        ptr[snpPos] = iupac(snp->name, snp->observed, ptr[snpPos]);
	verbose(5, "after substitution %c\n", ptr[snpPos]);
        }

    printExons(gene, seq, fileHandle);
    snpSimpleFreeList(&snps);
    dnaSeqFree(&seq);  
    }

geneFreeList(&genes);
if (fclose(fileHandle) != 0)
    errnoAbort("fclose failed");
}
Пример #2
0
struct axt *netFillToAxt(struct cnFill *fill, struct dnaSeq *tChrom , int tSize,
	struct hash *qChromHash, char *nibDir,
	struct chain *chain, boolean swap)
/* Convert subset of chain as defined by fill to axt. swap query and target if swap is true*/
{
struct dnaSeq *qSeq;
boolean isRev = (chain->qStrand == '-');
struct chain *subChain, *chainToFree;
int qOffset;
struct axt *axtList = NULL , *axt;
struct nibInfo *nib = hashFindVal(qChromHash, fill->qName);

/* Get query sequence fragment. */
    {
    if (nib == NULL)
        {
	char path[512];
	AllocVar(nib);
	safef(path, sizeof(path), "%s/%s.nib", nibDir, fill->qName);
	nib->fileName = cloneString(path);
	nibOpenVerify(path, &nib->f, &nib->size);
	hashAdd(qChromHash, fill->qName, nib);
	}
    qSeq = nibLoadPartMasked(NIB_MASK_MIXED, nib->fileName, 
    	fill->qStart, fill->qSize);
    if (isRev)
	{
        reverseComplement(qSeq->dna, qSeq->size);
	qOffset = nib->size - (fill->qStart + fill->qSize);
	}
    else
	qOffset = fill->qStart;
    }
chainSubsetOnT(chain, fill->tStart, fill->tStart + fill->tSize, 
	&subChain, &chainToFree);
if (subChain != NULL)
    {
    axtList = chainToAxt(subChain, qSeq, qOffset, tChrom, fill->tStart, 100, BIGNUM);
    if (swap)
        {
        for (axt = axtList ; axt != NULL ; axt = axt->next)
            axtSwap(axt, tSize, nib->size);
        }
    }
chainFree(&chainToFree);
freeDnaSeq(&qSeq);
return axtList;
}
void createFastaFilesForBits(char *root, struct genomeBit *gbList, boolean addDummy)
/* load all of the fasta records for the bits in the genome list into one
   fasta file. Uses .nib files as they are much more compact and allow random access. */
{
struct dnaSeq *seq = NULL;
struct genomeBit *gb = NULL;
FILE *faOut = NULL;
char *faFile = NULL;
char *nibFile = NULL;
int totalBp = 0;
assert(gbList);
faFile = fileNameFromGenomeBit(outputRoot, ".fa", gbList);
faOut = mustOpen(faFile, "w");
for(gb = gbList; gb != NULL; gb = gb->next)
    {
    char buff[256];
    snprintf(buff, sizeof(buff), "%s:%u-%u", gb->chrom, gb->chromStart, gb->chromEnd);
    nibFile = nibFileFromChrom(root, gb->chrom);
    seq = nibLoadPartMasked(NIB_MASK_MIXED, nibFile, gb->chromStart, gb->chromEnd-gb->chromStart);
    totalBp += strlen(seq->dna);

    faWriteNext(faOut, buff, seq->dna, seq->size);
    dnaSeqFree(&seq);
    freez(&nibFile);
    }
/* Add a dummy fasta record so that avid will order and orient things for us.. */
if(addDummy)
    faWriteNext(faOut, "garbage", "nnnnnnnnnn", 10);
carefulClose(&faOut);
/** This bit is commented out as we are now using nnnn's as repeat masking */
/*  if(slCount(gbList) > 1) */
/*      repeatMaskFile(outputRoot, gbList); */
/*  else */
/*    fakeRepeatMaskFile(outputRoot, gbList); */
freez(&faFile);
}