boolean wormParseChromRange(char *in, char **retChromId, int *retStart, int *retEnd) /* Chop up a string representation of a range within a chromosome and put the * pieces into the return variables. Return FALSE if it isn't formatted right. */ { char *words[5]; int wordCount; char *chromId; char buf[128]; strncpy(buf, in, sizeof(buf)); wordCount = chopString(buf, "- \t\r\n:", words, ArraySize(words)); if (wordCount != 3) return FALSE; chromId = wormOfficialChromName(words[0]); if (chromId == NULL) return FALSE; if (!isdigit(words[1][0]) || !isdigit(words[2][0])) return FALSE; *retChromId = chromId; *retStart = atoi(words[1]); *retEnd = atoi(words[2]); wormClipRangeToChrom(chromId, retStart, retEnd); return TRUE; }
void showClump(struct ernaClump *clump, FILE *f) /* Show detailed alignment for one clump. */ { int chromStart = clump->start - 1000; int chromEnd = clump->end + 1000; int chromSize; DNA *chromDna; struct wormFeature *cdnaNameList, *cdnaName; struct lineAli *laList = NULL, *la; struct ffAli *ali; struct dnaSeq *cdna; boolean rcCdna; int clumpSize = clump->end - clump->start + 1; int displaySize = lineSize; int displayStart = (clump->start+clump->end)/2 - displaySize/2; int displayEnd = displayStart + displaySize; int displayDnaOffset; DNA *displayDna; struct ernaHit *hit; /* Get genomic dna and list of all cDNAs in area around clump. */ wormClipRangeToChrom(clump->chrom, &chromStart, &chromEnd); chromSize = chromEnd - chromStart; chromDna = wormChromPart(clump->chrom, chromStart, chromSize); cdnaNameList = wormCdnasInRange(clump->chrom, chromStart, chromEnd); /* Figure out 60 bases to display alignment around clump. */ wormClipRangeToChrom(clump->chrom, &displayStart, &displayEnd); displaySize = displayEnd - displayStart; displayDnaOffset = displayStart - chromStart; displayDna = chromDna + displayDnaOffset; /* Make up detailed alignment on each cDNA */ for (cdnaName = cdnaNameList; cdnaName != NULL; cdnaName = cdnaName->next) { struct wormCdnaInfo info; if (!wormCdnaSeq(cdnaName->name, &cdna, &info)) { warn("Couldn't find %s", cdnaName->name); continue; } if (!ffFindEitherStrandN(cdna->dna, cdna->size, chromDna, chromSize, ffCdna, &ali, &rcCdna)) { warn("Couldn't align %s", cdnaName->name); continue; } if (rcCdna) reverseComplement(cdna->dna, cdna->size); la = makeLineAli(cdnaName->name, ali, chromDna, cdna->dna, displayDnaOffset); la->isEmbryo = info.isEmbryonic; slAddHead(&laList, la); freeDnaSeq(&cdna); ffFreeAli(&ali); } /* Display genomic with upper case at hot spots*/ displayDna[displaySize] = 0; for (hit = clump->hits; hit != NULL; hit = hit->next) { int doff = hit->pos - chromStart; chromDna[doff] = toupper(chromDna[doff]); } fprintf(f, "%s Genomic\n", displayDna); /* Display aligned list by sorted score. */ slSort(&laList, cmpLaScore); for (la = laList; la != NULL; la = la->next) { if (spaceCount(la->line) != lineSize) fprintf(f, "%s %s %s\n", la->line, la->name, (la->isEmbryo ? "emb" : " ")); } /* Clean up. */ slFreeList(&cdnaNameList); slFreeList(&laList); freeMem(chromDna); }
int main(int argc, char *argv[]) { #define stepSize 10000 #define extraBases 1000 static struct noiseTrack noiseTrack[stepSize]; int chromIx; int chromSize; int baseOff; char *chromName; int dnaStart, dnaEnd; char *outName; FILE *out; struct hash *dupeHash; if (argc != 2) { errAbort("editbase - lists bases for which there is evidence of RNA editing\n" "usage:\n" " editbase outfile.txt"); } dnaUtilOpen(); initVlookup(); outName = argv[1]; out = mustOpen(outName, "w"); printf("Scanning for cDNAs that align more than once.\n"); dupeHash = buildMultiAlignHash(); printf("Loading worm genome\n"); wormLoadNt4Genome(&chrom, &chromCount); wormChromNames(&chromNames, &chromCount); for (chromIx = 0; chromIx < chromCount; ++chromIx) { chromName = chromNames[chromIx]; printf("Processing chromosome %s\n", chromName); chromSize = wormChromSize(chromName); for (baseOff = 0; baseOff < chromSize; baseOff += stepSize) { struct wormFeature *cdnaNamesList, *name; struct cdnaAli *caList = NULL, *ca; int dnaSize; DNA *dna; int chunkSize; DNA *chunk; int i; /* Figure out how much DNA to get and get it. Include some * extra around chunk so can align better. */ chunkSize = chromSize - baseOff; if (chunkSize > stepSize) chunkSize = stepSize; dnaStart = baseOff - extraBases; dnaEnd = baseOff + stepSize + extraBases; wormClipRangeToChrom(chromName, &dnaStart, &dnaEnd); dnaSize = dnaEnd - dnaStart; dna = wormChromPart(chromName, dnaStart, dnaSize); /* Get the cDNAs */ cdnaNamesList = wormCdnasInRange(chromName, baseOff, baseOff + chunkSize); for (name = cdnaNamesList; name != NULL; name = name->next) { if (!hashLookup(dupeHash, name->name) ) { ca = makeCdnaAli(name->name, dna, dnaSize); slAddHead(&caList, ca); } } slReverse(&caList); /* Add cdnas to noise track. */ chunk = dna + baseOff - dnaStart; for (ca = caList; ca != NULL; ca = ca->next) { addNoiseTrack(noiseTrack, chunk, chunkSize, ca); } /* Step through base by base evaluating noise and reporting it if * it's interesting. */ for (i=0; i<chunkSize; ++i) { struct noiseTrack *nt = &noiseTrack[i]; struct noise *noise = nt->noise; int noiseCount = slCount(noise); if (noiseCount > 1) { char commonVal; int commonCount; findCommon(noise, &commonVal, &commonCount); if (commonCount*2 > noiseCount && commonVal != 'n') { double ratio = (double)commonCount/noiseCount; double score; ratio = ratio * ratio * ratio; score = ratio * commonCount; if (score >= 4.0) { fprintf(stdout, "%f %s:%d %c->%c in %d out of %d out of %d %s\n", ratio*commonCount, chromName, i+baseOff+1, chunk[i], commonVal, commonCount, noiseCount, nt->cdnaCount, nt->noise->ca->cdna->srn->name); fprintf(out, "%f %s:%d %c->%c in %d out of %d out of %d %s\n", ratio*ratio*commonCount, chromName, i+baseOff+1, chunk[i], commonVal, commonCount, noiseCount, nt->cdnaCount, nt->noise->ca->cdna->srn->name); } } } } freeCdnaAliList(&caList); slFreeList(&cdnaNamesList); freez(&dna); recycleNoiseTrack(noiseTrack, chunkSize); printf("%s %d maxNoise %d\n", chromName, baseOff, slCount(freeNoiseList)); } } return 0; }