static void handle_counts ( SOURCE* s, CacheProfFile* cpf, char* fi, char* fn, char* newCountsStr ) { WordFM* countsMap; Bool freeNewCounts; UWord lnno; Counts* newCounts; FileFn* topKey; if (0) printf("%s %s %s\n", fi, fn, newCountsStr ); // parse the numbers newCounts = splitUpCountsLine( s, &lnno, newCountsStr ); // Did we get the right number? if (newCounts->n_counts != cpf->n_events) goto oom; // allocate the key topKey = malloc(sizeof(FileFn)); if (topKey) { topKey->fi_name = strdup(fi); topKey->fn_name = strdup(fn); } if (! (topKey && topKey->fi_name && topKey->fn_name)) mallocFail(s, "handle_counts:"); // search for it if (lookupFM( cpf->outerMap, (Word*)(&countsMap), (Word)topKey )) { // found it. Merge in new counts freeNewCounts = addCountsToMap( s, countsMap, lnno, newCounts ); ddel_FileFn(topKey); } else { // not found in the top map. Create new entry countsMap = newFM( malloc, free, cmp_unboxed_UWord ); if (!countsMap) goto oom; addToFM( cpf->outerMap, (Word)topKey, (Word)countsMap ); freeNewCounts = addCountsToMap( s, countsMap, lnno, newCounts ); } // also add to running summary total addCounts( s, cpf->summary, newCounts ); // if safe to do so, free up the count vector if (freeNewCounts) ddel_Counts(newCounts); return; oom: parseError(s, "# counts doesn't match # events"); }
void mafStats(char *twoBitFile, char *mafDir, char *outFile) /* mafStats - Calculate basic stats on maf file including species-by-species * coverage and percent ID. */ { struct twoBitFile *tbf = twoBitOpen(twoBitFile); FILE *f = mustOpen(outFile, "w"); struct twoBitIndex *ix; long genomeSize = 0; struct hash *speciesHash = hashNew(0); struct speciesAcc *speciesList = NULL, *species; for (ix = tbf->indexList; ix != NULL; ix = ix->next) { unsigned chromSize = twoBitSeqSizeNoNs(tbf, ix->name); genomeSize += chromSize; char mafFileName[PATH_LEN]; safef(mafFileName, sizeof(mafFileName), "%s/%s.maf", mafDir, ix->name); struct mafFile *mf = mafMayOpen(mafFileName); verbose(1, "processing %s\n", ix->name); if (mf == NULL) { warn("%s doesn't exist", mafFileName); continue; } struct mafAli *maf; while ((maf = mafNext(mf)) != NULL) { struct mafComp *mc; for (mc = maf->components; mc != NULL; mc = mc->next) { if (mc->text != NULL) toUpperN(mc->text, maf->textSize); } addCounts(maf, speciesHash, &speciesList); mafAliFree(&maf); } mafFileFree(&mf); } slReverse(&speciesList); for (species = speciesList; species != NULL; species = species->next) { fprintf(f, "counts: %s\t%ld\t%ld\t%ld\n", species->name, species->covCount, species->aliCount, species->idCount); fprintf(f, "precents: %s\t%4.2f%%\t%4.2f%%\t%4.2f%%\n", species->name, 100.0 * species->covCount/genomeSize, 100.0 * species->aliCount/genomeSize, 100.0 * species->idCount/species->aliCount); } carefulClose(&f); }
static Bool addCountsToMap ( SOURCE* s, WordFM* counts_map, UWord lnno, Counts* newCounts ) { Counts* oldCounts; // look up lnno in the map. If none present, add a binding // lnno->counts. If present, add counts to the existing entry. if (lookupFM( counts_map, (Word*)(&oldCounts), (Word)lnno )) { // merge with existing binding addCounts( s, oldCounts, newCounts ); return True; } else { // create new binding addToFM( counts_map, (Word)lnno, (Word)newCounts ); return False; } }
static void merge_CacheProfInfo ( SOURCE* s, /*MOD*/CacheProfFile* dst, CacheProfFile* src ) { /* For each (filefn, innerMap) in src if filefn not in dst add binding dopy(filefn)->dopy(innerMap) in src else // merge src->innerMap with dst->innerMap for each (lineno, counts) in src->innerMap if lineno not in dst->innerMap add binding lineno->dopy(counts) to dst->innerMap else add counts into dst->innerMap[lineno] */ /* Outer iterator: FileFn* -> WordFM* (inner iterator) Inner iterator: UWord -> Counts* */ FileFn* soKey; WordFM* soVal; WordFM* doVal; UWord siKey; Counts* siVal; Counts* diVal; /* First check mundane things: that the events: lines are identical. */ if (!streq( dst->events_line, src->events_line )) barf(s, "\"events:\" line of most recent file does " "not match those previously processed"); initIterFM( src->outerMap ); // for (filefn, innerMap) in src while (nextIterFM( src->outerMap, (Word*)&soKey, (Word*)&soVal )) { // is filefn in dst? if (! lookupFM( dst->outerMap, (Word*)&doVal, (Word)soKey )) { // no .. add dopy(filefn) -> dopy(innerMap) to src FileFn* c_soKey = dopy_FileFn(soKey); WordFM* c_soVal = dopy_InnerMap(soVal); if ((!c_soKey) || (!c_soVal)) goto oom; addToFM( dst->outerMap, (Word)c_soKey, (Word)c_soVal ); } else { // yes .. merge the two innermaps initIterFM( soVal ); // for (lno, counts) in soVal (source inner map) while (nextIterFM( soVal, (Word*)&siKey, (Word*)&siVal )) { // is lno in the corresponding dst inner map? if (! lookupFM( doVal, (Word*)&diVal, siKey )) { // no .. add lineno->dopy(counts) to dst inner map Counts* c_siVal = dopy_Counts( siVal ); if (!c_siVal) goto oom; addToFM( doVal, siKey, (Word)c_siVal ); } else { // yes .. merge counts into dst inner map val addCounts( s, diVal, siVal ); } } } } // add the summaries too addCounts(s, dst->summary, src->summary ); return; oom: mallocFail(s, "merge_CacheProfInfo"); }