Пример #1
0
static
void handle_counts ( SOURCE* s,
                     CacheProfFile* cpf,
                     char* fi, char* fn, char* newCountsStr )
{
    WordFM* countsMap;
    Bool    freeNewCounts;
    UWord   lnno;
    Counts* newCounts;
    FileFn* topKey;

    if (0)  printf("%s %s %s\n", fi, fn, newCountsStr );

    // parse the numbers
    newCounts = splitUpCountsLine( s, &lnno, newCountsStr );

    // Did we get the right number?
    if (newCounts->n_counts != cpf->n_events)
        goto oom;

    // allocate the key
    topKey = malloc(sizeof(FileFn));
    if (topKey) {
        topKey->fi_name = strdup(fi);
        topKey->fn_name = strdup(fn);
    }
    if (! (topKey && topKey->fi_name && topKey->fn_name))
        mallocFail(s, "handle_counts:");

    // search for it
    if (lookupFM( cpf->outerMap, (Word*)(&countsMap), (Word)topKey )) {
        // found it.  Merge in new counts
        freeNewCounts = addCountsToMap( s, countsMap, lnno, newCounts );
        ddel_FileFn(topKey);
    } else {
        // not found in the top map.  Create new entry
        countsMap = newFM( malloc, free, cmp_unboxed_UWord );
        if (!countsMap)
            goto oom;
        addToFM( cpf->outerMap, (Word)topKey, (Word)countsMap );
        freeNewCounts = addCountsToMap( s, countsMap, lnno, newCounts );
    }

    // also add to running summary total
    addCounts( s, cpf->summary, newCounts );

    // if safe to do so, free up the count vector
    if (freeNewCounts)
        ddel_Counts(newCounts);

    return;

oom:
    parseError(s, "# counts doesn't match # events");
}
void mafStats(char *twoBitFile, char *mafDir, char *outFile)
/* mafStats - Calculate basic stats on maf file including species-by-species 
 * coverage and percent ID. */
{
struct twoBitFile *tbf = twoBitOpen(twoBitFile);
FILE *f = mustOpen(outFile, "w");
struct twoBitIndex *ix;
long genomeSize = 0;
struct hash *speciesHash = hashNew(0);
struct speciesAcc *speciesList = NULL, *species;
for (ix = tbf->indexList; ix != NULL; ix = ix->next)
    {
    unsigned chromSize = twoBitSeqSizeNoNs(tbf, ix->name);
    genomeSize += chromSize;
    char mafFileName[PATH_LEN];
    safef(mafFileName, sizeof(mafFileName), "%s/%s.maf", mafDir, ix->name);
    struct mafFile *mf = mafMayOpen(mafFileName);
    verbose(1, "processing %s\n", ix->name);
    if (mf == NULL)
        {
	warn("%s doesn't exist", mafFileName);
	continue;
	}
    struct mafAli *maf;
    while ((maf = mafNext(mf)) != NULL)
        {
	struct mafComp *mc;
	for (mc = maf->components; mc != NULL; mc = mc->next)
	    {
	    if (mc->text != NULL)
		toUpperN(mc->text, maf->textSize);
	    }
	addCounts(maf, speciesHash, &speciesList);
	mafAliFree(&maf);
	}
    mafFileFree(&mf);
    }
slReverse(&speciesList);

for (species = speciesList; species != NULL; species = species->next)
    {
    fprintf(f, "counts: %s\t%ld\t%ld\t%ld\n", species->name, species->covCount, species->aliCount, species->idCount);
    fprintf(f, "precents: %s\t%4.2f%%\t%4.2f%%\t%4.2f%%\n", 
    	species->name, 100.0 * species->covCount/genomeSize,
	100.0 * species->aliCount/genomeSize,
	100.0 * species->idCount/species->aliCount);
    }
carefulClose(&f);
}
Пример #3
0
static Bool addCountsToMap ( SOURCE* s,
                             WordFM* counts_map,
                             UWord lnno, Counts* newCounts )
{
    Counts* oldCounts;
    // look up lnno in the map.  If none present, add a binding
    // lnno->counts.  If present, add counts to the existing entry.
    if (lookupFM( counts_map, (Word*)(&oldCounts), (Word)lnno )) {
        // merge with existing binding
        addCounts( s, oldCounts, newCounts );
        return True;
    } else {
        // create new binding
        addToFM( counts_map, (Word)lnno, (Word)newCounts );
        return False;
    }
}
Пример #4
0
static void merge_CacheProfInfo ( SOURCE* s,
                                  /*MOD*/CacheProfFile* dst,
                                  CacheProfFile* src )
{
    /* For each (filefn, innerMap) in src
       if filefn not in dst
          add binding dopy(filefn)->dopy(innerMap) in src
       else
          // merge src->innerMap with dst->innerMap
          for each (lineno, counts) in src->innerMap
          if lineno not in dst->innerMap
             add binding lineno->dopy(counts) to dst->innerMap
          else
             add counts into dst->innerMap[lineno]
    */
    /* Outer iterator:  FileFn* -> WordFM* (inner iterator)
       Inner iterator:  UWord   -> Counts*
    */
    FileFn* soKey;
    WordFM* soVal;
    WordFM* doVal;
    UWord   siKey;
    Counts* siVal;
    Counts* diVal;

    /* First check mundane things: that the events: lines are
       identical. */
    if (!streq( dst->events_line, src->events_line ))
        barf(s, "\"events:\" line of most recent file does "
             "not match those previously processed");

    initIterFM( src->outerMap );

    // for (filefn, innerMap) in src
    while (nextIterFM( src->outerMap, (Word*)&soKey, (Word*)&soVal )) {

        // is filefn in dst?
        if (! lookupFM( dst->outerMap, (Word*)&doVal, (Word)soKey )) {

            // no .. add dopy(filefn) -> dopy(innerMap) to src
            FileFn* c_soKey = dopy_FileFn(soKey);
            WordFM* c_soVal = dopy_InnerMap(soVal);
            if ((!c_soKey) || (!c_soVal)) goto oom;
            addToFM( dst->outerMap, (Word)c_soKey, (Word)c_soVal );

        } else {

            // yes .. merge the two innermaps
            initIterFM( soVal );

            // for (lno, counts) in soVal (source inner map)
            while (nextIterFM( soVal, (Word*)&siKey, (Word*)&siVal )) {

                // is lno in the corresponding dst inner map?
                if (! lookupFM( doVal, (Word*)&diVal, siKey )) {

                    // no .. add lineno->dopy(counts) to dst inner map
                    Counts* c_siVal = dopy_Counts( siVal );
                    if (!c_siVal) goto oom;
                    addToFM( doVal, siKey, (Word)c_siVal );

                } else {

                    // yes .. merge counts into dst inner map val
                    addCounts( s, diVal, siVal );

                }
            }

        }

    }

    // add the summaries too
    addCounts(s, dst->summary, src->summary );

    return;

oom:
    mallocFail(s, "merge_CacheProfInfo");
}