void snpMaskGenes(char *nibFile, char *outFile) /* snpMaskGenes - Print gene sequence, exons only, using IUPAC codes for single base substitutions. */ { struct genePred *genes = NULL; struct genePred *gene = NULL; struct dnaSeq *seq; char *ptr; struct snpSimple *snps = NULL; struct snpSimple *snp = NULL; int snpPos = 0; int size = 0; FILE *fileHandle = mustOpen(outFile, "w"); genes = readGenes(chromName); for (gene = genes; gene != NULL; gene = gene->next) { verbose(4, "gene = %s\n", gene->name); snps = readSnpsFromGene(gene, chromName); size = gene->txEnd - gene->txStart; assert(size > 0); AllocVar(seq); seq->dna = needLargeMem(size+1); seq = nibLoadPartMasked(NIB_MASK_MIXED, nibFile, gene->txStart, size); ptr = seq->dna; /* do substitutions */ /* including introns; doesn't take much time, keeps code clean */ for (snp = snps; snp != NULL; snp = snp->next) { snpPos = snp->chromStart - gene->txStart; assert(snpPos >= 0); verbose(5, "before substitution %c\n", ptr[snpPos]); ptr[snpPos] = iupac(snp->name, snp->observed, ptr[snpPos]); verbose(5, "after substitution %c\n", ptr[snpPos]); } printExons(gene, seq, fileHandle); snpSimpleFreeList(&snps); dnaSeqFree(&seq); } geneFreeList(&genes); if (fclose(fileHandle) != 0) errnoAbort("fclose failed"); }
struct axt *netFillToAxt(struct cnFill *fill, struct dnaSeq *tChrom , int tSize, struct hash *qChromHash, char *nibDir, struct chain *chain, boolean swap) /* Convert subset of chain as defined by fill to axt. swap query and target if swap is true*/ { struct dnaSeq *qSeq; boolean isRev = (chain->qStrand == '-'); struct chain *subChain, *chainToFree; int qOffset; struct axt *axtList = NULL , *axt; struct nibInfo *nib = hashFindVal(qChromHash, fill->qName); /* Get query sequence fragment. */ { if (nib == NULL) { char path[512]; AllocVar(nib); safef(path, sizeof(path), "%s/%s.nib", nibDir, fill->qName); nib->fileName = cloneString(path); nibOpenVerify(path, &nib->f, &nib->size); hashAdd(qChromHash, fill->qName, nib); } qSeq = nibLoadPartMasked(NIB_MASK_MIXED, nib->fileName, fill->qStart, fill->qSize); if (isRev) { reverseComplement(qSeq->dna, qSeq->size); qOffset = nib->size - (fill->qStart + fill->qSize); } else qOffset = fill->qStart; } chainSubsetOnT(chain, fill->tStart, fill->tStart + fill->tSize, &subChain, &chainToFree); if (subChain != NULL) { axtList = chainToAxt(subChain, qSeq, qOffset, tChrom, fill->tStart, 100, BIGNUM); if (swap) { for (axt = axtList ; axt != NULL ; axt = axt->next) axtSwap(axt, tSize, nib->size); } } chainFree(&chainToFree); freeDnaSeq(&qSeq); return axtList; }
void createFastaFilesForBits(char *root, struct genomeBit *gbList, boolean addDummy) /* load all of the fasta records for the bits in the genome list into one fasta file. Uses .nib files as they are much more compact and allow random access. */ { struct dnaSeq *seq = NULL; struct genomeBit *gb = NULL; FILE *faOut = NULL; char *faFile = NULL; char *nibFile = NULL; int totalBp = 0; assert(gbList); faFile = fileNameFromGenomeBit(outputRoot, ".fa", gbList); faOut = mustOpen(faFile, "w"); for(gb = gbList; gb != NULL; gb = gb->next) { char buff[256]; snprintf(buff, sizeof(buff), "%s:%u-%u", gb->chrom, gb->chromStart, gb->chromEnd); nibFile = nibFileFromChrom(root, gb->chrom); seq = nibLoadPartMasked(NIB_MASK_MIXED, nibFile, gb->chromStart, gb->chromEnd-gb->chromStart); totalBp += strlen(seq->dna); faWriteNext(faOut, buff, seq->dna, seq->size); dnaSeqFree(&seq); freez(&nibFile); } /* Add a dummy fasta record so that avid will order and orient things for us.. */ if(addDummy) faWriteNext(faOut, "garbage", "nnnnnnnnnn", 10); carefulClose(&faOut); /** This bit is commented out as we are now using nnnn's as repeat masking */ /* if(slCount(gbList) > 1) */ /* repeatMaskFile(outputRoot, gbList); */ /* else */ /* fakeRepeatMaskFile(outputRoot, gbList); */ freez(&faFile); }