void reTraceFixMaf(char *mafFileName, char *qaFileName, char *newMafFileName) /* reTraceFixMaf - Add quality line and recompute chrom line in maf. */ { struct mafFile *mFile = mafReadAll(mafFileName); struct qaSeq *qaList = qaRead(qaFileName); struct hash *qaHash = makeQaHash(qaList); struct mafAli *ali; for (ali = mFile->alignments; ali != NULL; ali = ali->next) { if (!ali->components || !ali->components->next) errAbort("Something's up with the maf."); else { struct mafComp *secondSrc = ali->components->next; struct qaSeq *qas = hashMustFindVal(qaHash, secondSrc->src); int i, offset; int length = strlen(secondSrc->text); if (secondSrc->strand == '-') reverseBytes(qas->qa, qas->size); offset = secondSrc->start; AllocArray(secondSrc->quality, length+1); for (i = 0; i < length; i++) { if (secondSrc->text[i] == '-') secondSrc->quality[i] = '-'; else { int q = (int)qas->qa[offset++]; char c = 'F'; if ((q >= 0) && (q < 45)) { q = q / 5; c = '0' + q; } else if ((q >= 45) && (q < 98)) c = '9'; else if (q == 99) c = '0'; else c = 'F'; secondSrc->quality[i] = c; } } secondSrc->quality[length] = '\0'; if (secondSrc->strand == '-') reverseBytes(qas->qa, qas->size); } } mafWriteAll(mFile, newMafFileName); hashFree(&qaHash); qaSeqFreeList(&qaList); mafFileFree(&mFile); }
void txCdsPredict(char *inFa, char *outCds, char *nmdBed, char *mafFile, boolean anyStart) /* txCdsPredict - Somewhat simple-minded ORF predictor using a weighting scheme.. */ { struct dnaSeq *rna, *rnaList = faReadAllDna(inFa); verbose(2, "Read %d sequences from %s\n", slCount(rnaList), inFa); /* Make up hash of bed records for NMD analysis. */ struct hash *nmdHash = hashNew(18); if (nmdBed != NULL) { struct bed *bed, *bedList = bedLoadNAll(nmdBed, 12); for (bed = bedList; bed != NULL; bed = bed->next) hashAdd(nmdHash, bed->name, bed); verbose(2, "Read %d beds from %s\n", nmdHash->elCount, nmdBed); } /* Make up hash of maf records for conservation analysis. */ struct hash *mafHash = hashNew(18); int otherSpeciesCount = 0; if (mafFile != NULL) { struct mafFile *mf = mafReadAll(mafFile); struct mafAli *maf; for (maf = mf->alignments; maf != NULL; maf = maf->next) hashAdd(mafHash, maf->components->src, maf); verbose(2, "Read %d alignments from %s\n", mafHash->elCount, mafFile); struct hash *uniqSpeciesHash = hashNew(0); for (maf = mf->alignments; maf != NULL; maf = maf->next) { struct mafComp *comp; for (comp = maf->components->next; comp != NULL; comp = comp->next) hashStore(uniqSpeciesHash, comp->src); } otherSpeciesCount = uniqSpeciesHash->elCount; verbose(2, "%d other species in %s\n", otherSpeciesCount, mafFile); } FILE *f = mustOpen(outCds, "w"); for (rna = rnaList; rna != NULL; rna = rna->next) { verbose(3, "%s\n", rna->name); struct cdsEvidence *orfList = orfsOnRna(rna, nmdHash, mafHash, otherSpeciesCount, anyStart); if (orfList != NULL) { slSort(&orfList, cdsEvidenceCmpScore); cdsEvidenceTabOut(orfList, f); } cdsEvidenceFreeList(&orfList); } carefulClose(&f); }
void mafToSnpBed(char *database, char *mafIn, char *gpIn, char *bedOut) /* mafToSnpBed - finds SNPs in MAF and builds a bed with their functional consequence. */ { struct mafFile *mafFile = mafReadAll(mafIn); struct genePred *genePred = genePredLoadAll(gpIn); FILE *f = mustOpen(bedOut, "w"); struct genePred *gp, *next; for(gp = genePred; gp; gp = next) { next = gp->next; gp->next = NULL; parseOneGp(database, mafFile->alignments, gp, f); } }