boolean getWormGeneDna(char *name, DNA **retDna, boolean upcExons) /* Get the DNA associated with a gene. Optionally upper case exons. */ { struct gdfGene *g; struct slName *syn = NULL; long lstart, lend; int start, end; int dnaSize; DNA *dna; struct wormGdfCache *gdfCache; /* Translate biologist type name to cosmid.N name */ if (wormIsGeneName(name)) { syn = wormGeneToOrfNames(name); if (syn != NULL) name = syn->name; } if (strncmp(name, "g-", 2) == 0) gdfCache = &wormGenieGdfCache; else gdfCache = &wormSangerGdfCache; if ((g = wormGetSomeGdfGene(name, gdfCache)) == NULL) return FALSE; gdfGeneExtents(g, &lstart, &lend); start = lstart; end = lend; /* wormClipRangeToChrom(chromIds[g->chromIx], &start, &end); */ dnaSize = end-start; *retDna = dna = wormChromPart(chromIds[g->chromIx], start, dnaSize); gdfOffsetGene(g, -start); if (g->strand == '-') { reverseComplement(dna, dnaSize); gdfRcGene(g, dnaSize); } if (upcExons) { int i; struct gdfDataPoint *pt = g->dataPoints; for (i=0; i<g->dataCount; i += 2) { toUpperN(dna + pt[i].start, pt[i+1].start - pt[i].start); } } gdfFreeGene(g); return TRUE; }
boolean getWormGeneExonDna(char *name, DNA **retDna) /* Get the DNA associated with a gene, without introns. */ { struct gdfGene *g; struct slName *syn = NULL; long lstart, lend; int start, end; int dnaSize; DNA *dna; int i; struct gdfDataPoint *pt = NULL; struct wormGdfCache *gdfCache; struct dyString *dy = newDyString(1000); /* Translate biologist type name to cosmid.N name */ if (wormIsGeneName(name)) { syn = wormGeneToOrfNames(name); if (syn != NULL) name = syn->name; } if (strncmp(name, "g-", 2) == 0) gdfCache = &wormGenieGdfCache; else gdfCache = &wormSangerGdfCache; if ((g = wormGetSomeGdfGene(name, gdfCache)) == NULL) return FALSE; gdfGeneExtents(g, &lstart, &lend); start = lstart; end = lend; /*wormClipRangeToChrom(chromIds[g->chromIx], &start, &end);*/ dnaSize = end-start; dna = wormChromPart(chromIds[g->chromIx], start, dnaSize); gdfOffsetGene(g, -start); if (g->strand == '-') { reverseComplement(dna, dnaSize); gdfRcGene(g, dnaSize); } pt = g->dataPoints; for (i=0; i<g->dataCount; i += 2) { dyStringAppendN(dy, (dna+pt[i].start), (pt[i+1].start - pt[i].start)); } *retDna = cloneString(dy->string); dyStringFree(&dy); gdfFreeGene(g); return TRUE; }
void gdfUpcExons(struct gdfGene *gene, int geneOffset, DNA *dna, int dnaSize, int dnaOffset) /* Uppercase exons in DNA. */ { struct gdfDataPoint *dp = gene->dataPoints; int count = gene->dataCount; int start, end; long gffStart, gffEnd; int combinedOffset; int i; gdfGeneExtents(gene, &gffStart, &gffEnd); combinedOffset = -gffStart + geneOffset - dnaOffset; for (i=0; i<count; i += 2) { start = dp[i].start + combinedOffset; end = dp[i+1].start + combinedOffset; if (end <= 0 || start >= dnaSize) continue; if (start < 0) start = 0; if (end > dnaSize) end = dnaSize; toUpperN(dna+start, end-start); } }