static char *getEstDir(char *def, char *com) /* Return EST direction as deduced from definition and comment lines. */ { char *three = "3'"; char *five = "5'"; char *dir = NULL; boolean gotThreeDef = FALSE, gotFiveDef = FALSE; boolean gotThreeCom = FALSE, gotFiveCom = FALSE; gotThreeDef = isThreePrime(def); gotFiveDef = isFivePrime(def); if (gotThreeDef ^ gotFiveDef) dir = (gotThreeDef ? three : five); if (dir == NULL) { gotThreeCom = isThreePrime(com); gotFiveCom = isFivePrime(com); if (gotThreeCom ^ gotFiveCom) dir = (gotThreeCom ? three : five); } /* either didn't get 5' or 3' in def and com, or got both */ return dir; }
boolean findIgRegion(struct entity *ent, int *retStart, int *retEnd, int *retCount) /* Find intergenic region compute mean and variance of 3' ends, then * return mean after discarding outlyers. Returns false if data looks funky. */ { int totalCount = 0; int totalPos = 0; int insideCount = 0; int insidePos = 0; double mean; double insideMean; double dif; double varience = 0; double std; struct cdaRef *ref; struct cdaAli *ali; int end; boolean revStrand = (ent->strand == '-'); /* Calculate mean. */ for (ref = ent->cdaRefList; ref != NULL; ref = ref->next) { ali = ref->ali; if (isThreePrime(ali)) { ++totalCount; if (revStrand) totalPos += ali->chromStart-1; else totalPos += ali->chromEnd; } } if (totalCount <= 0) return FALSE; mean = (double)totalPos/totalCount; /* Calculate square root of varience to estimate standard deviation. */ for (ref = ent->cdaRefList; ref != NULL; ref = ref->next) { ali = ref->ali; if (isThreePrime(ali)) { if (revStrand) dif = ali->chromStart-1; else dif = ali->chromEnd; dif -= mean; varience += dif*dif; } } varience /= totalCount; std = paranoidSqrt(varience); /* If varience too large, or curve not very bell shaped return FALSE. * Figure out insideMean (mean of stuff within one standard deviation of outer mean.) */ if (std > 200) { return FALSE; } totalPos = 0; for (ref = ent->cdaRefList; ref != NULL; ref = ref->next) { ali = ref->ali; if (isThreePrime(ali)) { ++totalCount; if (revStrand) end = ali->chromStart-1; else end = ali->chromEnd; dif = end - mean; if (fabs(dif) <= 200) { ++insideCount; insidePos += end; } } } if (insideCount*2 < totalCount) { return FALSE; } insideMean = (double)insidePos/insideCount; if (revStrand) { end = round(insideMean - std*0.5); } else { end = round(insideMean + std*0.5); } *retStart = end-3; *retEnd = end+3; *retCount = totalCount; return TRUE; }