void wormClipRangeToChrom(char *chrom, int *pStart, int *pEnd) /* Make sure that we stay inside chromosome. */ { int chromEnd = wormChromSize(chrom); int temp; /* Swap ends if reversed. */ if (*pStart > *pEnd) { temp = *pEnd; *pEnd = *pStart; *pStart = temp; } /* Generally speaking try to slide the range covered by * start-end inside the chromosome rather than just * truncating an end. */ if (*pStart < 0) { *pEnd -= *pStart; *pStart = 0; } if (*pEnd > chromEnd) { *pStart -= *pEnd - chromEnd; *pEnd = chromEnd; } /* This handles case where the range is larger than the chromosome. */ if (*pStart < 0) *pStart = 0; }
int main(int argc, char *argv[]) { #define stepSize 10000 #define extraBases 1000 static struct noiseTrack noiseTrack[stepSize]; int chromIx; int chromSize; int baseOff; char *chromName; int dnaStart, dnaEnd; char *outName; FILE *out; struct hash *dupeHash; if (argc != 2) { errAbort("editbase - lists bases for which there is evidence of RNA editing\n" "usage:\n" " editbase outfile.txt"); } dnaUtilOpen(); initVlookup(); outName = argv[1]; out = mustOpen(outName, "w"); printf("Scanning for cDNAs that align more than once.\n"); dupeHash = buildMultiAlignHash(); printf("Loading worm genome\n"); wormLoadNt4Genome(&chrom, &chromCount); wormChromNames(&chromNames, &chromCount); for (chromIx = 0; chromIx < chromCount; ++chromIx) { chromName = chromNames[chromIx]; printf("Processing chromosome %s\n", chromName); chromSize = wormChromSize(chromName); for (baseOff = 0; baseOff < chromSize; baseOff += stepSize) { struct wormFeature *cdnaNamesList, *name; struct cdnaAli *caList = NULL, *ca; int dnaSize; DNA *dna; int chunkSize; DNA *chunk; int i; /* Figure out how much DNA to get and get it. Include some * extra around chunk so can align better. */ chunkSize = chromSize - baseOff; if (chunkSize > stepSize) chunkSize = stepSize; dnaStart = baseOff - extraBases; dnaEnd = baseOff + stepSize + extraBases; wormClipRangeToChrom(chromName, &dnaStart, &dnaEnd); dnaSize = dnaEnd - dnaStart; dna = wormChromPart(chromName, dnaStart, dnaSize); /* Get the cDNAs */ cdnaNamesList = wormCdnasInRange(chromName, baseOff, baseOff + chunkSize); for (name = cdnaNamesList; name != NULL; name = name->next) { if (!hashLookup(dupeHash, name->name) ) { ca = makeCdnaAli(name->name, dna, dnaSize); slAddHead(&caList, ca); } } slReverse(&caList); /* Add cdnas to noise track. */ chunk = dna + baseOff - dnaStart; for (ca = caList; ca != NULL; ca = ca->next) { addNoiseTrack(noiseTrack, chunk, chunkSize, ca); } /* Step through base by base evaluating noise and reporting it if * it's interesting. */ for (i=0; i<chunkSize; ++i) { struct noiseTrack *nt = &noiseTrack[i]; struct noise *noise = nt->noise; int noiseCount = slCount(noise); if (noiseCount > 1) { char commonVal; int commonCount; findCommon(noise, &commonVal, &commonCount); if (commonCount*2 > noiseCount && commonVal != 'n') { double ratio = (double)commonCount/noiseCount; double score; ratio = ratio * ratio * ratio; score = ratio * commonCount; if (score >= 4.0) { fprintf(stdout, "%f %s:%d %c->%c in %d out of %d out of %d %s\n", ratio*commonCount, chromName, i+baseOff+1, chunk[i], commonVal, commonCount, noiseCount, nt->cdnaCount, nt->noise->ca->cdna->srn->name); fprintf(out, "%f %s:%d %c->%c in %d out of %d out of %d %s\n", ratio*ratio*commonCount, chromName, i+baseOff+1, chunk[i], commonVal, commonCount, noiseCount, nt->cdnaCount, nt->noise->ca->cdna->srn->name); } } } } freeCdnaAliList(&caList); slFreeList(&cdnaNamesList); freez(&dna); recycleNoiseTrack(noiseTrack, chunkSize); printf("%s %d maxNoise %d\n", chromName, baseOff, slCount(freeNoiseList)); } } return 0; }