void makePai(boolean isEst, char *inName, char *outName) /* Decide what the input type is, and call appropriate reader. * Then write out list. */ { cloneHash = newHash(20); if (endsWith(inName, ".ra")) makeRaPai(isEst, inName, outName); else if (endsWith(inName, ".info")) makeInfoPai(isEst, inName, outName); else usage(); printf("Saving %s\n", outName); savePairs(outName); }
static void mapUniprot(struct spMapper *sm, char *qSpId, char *tSpId, float score) /* map uniprot/swissprot ids without uniref mappings */ { struct slName *qIds = mapSpId(sm, qSpId); struct slName *tIds = mapSpId(sm, tSpId); if ((qIds == NULL) || (tIds == NULL)) { if (qIds == NULL) handleNoMapping(sm, qSpId, noKGMapping); if (tIds == NULL) handleNoMapping(sm, tSpId, noKGMapping); sm->qtNoSpIdMapCnt++; } else { savePairs(sm, qSpId, tSpId, qIds, tIds, score); sm->qtMapCnt++; } }
static void mapUnirefEntries(struct spMapper *sm, struct uniref *qUniref, struct uniref *tUniref, char *qSpId, char *tSpId, float score) /* add all combinations of uniref entries */ { struct uniref *qUniprot, *tUniprot; boolean qKgFound = FALSE, tKgFound = FALSE; /* build list of kg pairs associate with these two uniref entries, avoiding * duplicated pairs. */ for (qUniprot = qUniref; qUniprot != NULL; qUniprot = qUniprot->next) { for (tUniprot = tUniref; tUniprot != NULL; tUniprot = tUniprot->next) { struct slName *qIds = mapSpId(sm, qUniprot->upAcc); struct slName *tIds = mapSpId(sm, tUniprot->upAcc); if (qIds != NULL) qKgFound = TRUE; if (tIds != NULL) tKgFound = TRUE; if ((qIds != NULL) && (tIds != NULL)) savePairs(sm, qSpId, tSpId, qIds, tIds, score); } } if (!(qKgFound && tKgFound)) { /* couldn't map to known genes */ if (!qKgFound) handleNoMapping(sm, qSpId, noKGMapping); if (!tKgFound) handleNoMapping(sm, tSpId, noKGMapping); sm->qtNoSpIdMapCnt++; } else { sm->qtMapCnt++; } }
void ppAnalyse(char *headersName, char *sizesName, char *pairsPsl, char *finDir, char *pairOut, char *mispairOut) /* ppAnalyse - Analyse paired plasmid reads. */ { struct hash *readHash = newHash(21); struct hash *pairHash = newHash(20); struct hash *fragHash = newHash(17); struct hash *finHash = NULL; struct hash *gsiHash = newHash(8); struct pairInfo *pairList = NULL, *measuredList, *pair; struct readInfo *readList = NULL, *rd; struct groupSizeInfo *gsiList = NULL, *gsi; int aliCount; int finCount; int i; struct slName *finList, *name; finHash = newHash(14); finList = listDir(finDir, "*.fa"); if ((finCount = slCount(finList)) == 0) errAbort("No fa files in %s\n", finDir); printf("Got %d (finished) .fa files in %s\n", finCount, finDir); for (name = finList; name != NULL; name = name->next) { chopSuffix(name->name); hashStore(finHash, name->name); } #ifdef SOMETIMES #endif /* SOMETIMES */ gsiList = readSizes(sizesName, gsiHash); printf("Got %d groups\n", slCount(gsiList)); readHeaders(headersName, readHash, pairHash, &readList, &pairList); slReverse(&readList); slReverse(&pairList); printf("Got %d reads in %d pairs\n", slCount(readList), slCount(pairList)); savePairs(pairOut, pairList, gsiHash); printf("Saved pairs to %s\n", pairOut); aliCount = readAlignments(pairsPsl, readHash, fragHash); printf("Got %d alignments in %s\n", aliCount, pairsPsl); doReadStats(readList, aliCount); doPairStats(pairList, finHash, gsiHash, mispairOut, &measuredList, &pairList); gsiMeanAndVariance(measuredList, gsiList, gsiHash, finHash); printf("Alignment length stats:\n"); for (i=0; i<10; ++i) { printf("%3d - %4d : %6d %4.2f%%\n", i*100, (i+1)*100, aliSizes[i], 100.0 * (double)aliSizes[i]/(double)aliCount); } doMeasuredStats(measuredList); for (gsi = gsiList; gsi != NULL; gsi = gsi->next) { if (gsi->measuredCount > 0) { printf("%s: mean %f, std %f, min %d, max %d, samples %d\n", gsi->name, gsi->measuredMean, sqrt(gsi->variance), gsi->measuredMin, gsi->measuredMax, gsi->measuredCount); printf(" %4.2f%% within guessed range (%d-%d)\n", 100.0 * (double)gsi->guessedCount/(double)gsi->measuredCount, gsi->guessedMin, gsi->guessedMax); printf(" w/in 200 %4.2f%%, w/in 400 %4.2f%%, w/in 800 %4.2f%%, w/in 1600 %4.3f%%, w/in 3200 %4.2f%%\n\n", gsiTight(gsi, 200), gsiTight(gsi, 400), gsiTight(gsi, 800), gsiTight(gsi, 1600), gsiTight(gsi, 3200) ); showHistogram(gsi); } } }