void twoBitInfo(char *inName, char *outName) /* twoBitInfo - get information about sequences in a .2bit file. */ { struct twoBitFile *tbf; FILE *outFile; char *seqName = NULL; twoBitParseRange(inName, &inName, &seqName, NULL, NULL); tbf = twoBitOpen(inName); outFile = mustOpen(outName, "w"); if (seqName != NULL) { char *seqArray[1023]; int i; int seqCount = chopString(seqName, ",", seqArray, ArraySize(seqArray)); for (i = 0 ; i < seqCount ; i++) { if (optionExists("maskBed")) twoBitOutMaskBeds(tbf, seqArray[i], outFile); else if (optionExists("nBed")) twoBitOutNBeds(tbf, seqArray[i], outFile); else if(optionExists("noNs")) fprintf(outFile, "%s\t%d\n", seqArray[i], twoBitSeqSizeNoNs(tbf, seqArray[i])); else fprintf(outFile, "%s\t%d\n", seqArray[i], twoBitSeqSize(tbf, seqArray[i])); } } else { struct twoBitIndex *index; for (index = tbf->indexList; index != NULL; index = index->next) { if (optionExists("maskBed")) twoBitOutMaskBeds(tbf, index->name, outFile); else if (optionExists("nBed")) twoBitOutNBeds(tbf, index->name, outFile); else if(optionExists("noNs")) fprintf(outFile, "%s\t%d\n", index->name, twoBitSeqSizeNoNs(tbf, index->name)); else fprintf(outFile, "%s\t%d\n", index->name, twoBitSeqSize(tbf, index->name)); } } twoBitClose(&tbf); carefulClose(&outFile); }
void mafStats(char *twoBitFile, char *mafDir, char *outFile) /* mafStats - Calculate basic stats on maf file including species-by-species * coverage and percent ID. */ { struct twoBitFile *tbf = twoBitOpen(twoBitFile); FILE *f = mustOpen(outFile, "w"); struct twoBitIndex *ix; long genomeSize = 0; struct hash *speciesHash = hashNew(0); struct speciesAcc *speciesList = NULL, *species; for (ix = tbf->indexList; ix != NULL; ix = ix->next) { unsigned chromSize = twoBitSeqSizeNoNs(tbf, ix->name); genomeSize += chromSize; char mafFileName[PATH_LEN]; safef(mafFileName, sizeof(mafFileName), "%s/%s.maf", mafDir, ix->name); struct mafFile *mf = mafMayOpen(mafFileName); verbose(1, "processing %s\n", ix->name); if (mf == NULL) { warn("%s doesn't exist", mafFileName); continue; } struct mafAli *maf; while ((maf = mafNext(mf)) != NULL) { struct mafComp *mc; for (mc = maf->components; mc != NULL; mc = mc->next) { if (mc->text != NULL) toUpperN(mc->text, maf->textSize); } addCounts(maf, speciesHash, &speciesList); mafAliFree(&maf); } mafFileFree(&mf); } slReverse(&speciesList); for (species = speciesList; species != NULL; species = species->next) { fprintf(f, "counts: %s\t%ld\t%ld\t%ld\n", species->name, species->covCount, species->aliCount, species->idCount); fprintf(f, "precents: %s\t%4.2f%%\t%4.2f%%\t%4.2f%%\n", species->name, 100.0 * species->covCount/genomeSize, 100.0 * species->aliCount/genomeSize, 100.0 * species->idCount/species->aliCount); } carefulClose(&f); }