int main(int argc, char** argv) { struct mafFile* mf; struct mafAli* ali; struct mafComp* mc; if ( argc != 2) { printf("remove_self maf-file\n"); return 1; } init_scores70(); mafWriteStart(stdout, 0); mf = mafOpen(argv[1], 0); while((ali = mafNext(mf)) != NULL) { mc = ali->components; if(mc->next->strand == '+' && mc->start > mc->next->start) continue; else if(mc->next->strand == '-' && mc->start > (mc->next->srcSize - mc->next->start - mc->next->size)) continue; else mafWrite(stdout, ali); } mafFileFree(&mf); mafWriteEnd(stdout); return 0; }
void mafAddIRows(char *mafIn, char *twoBitIn, char *mafOut, char *nBedFile) /* mafAddIRows - Filter out maf files. */ { FILE *f = mustOpen(mafOut, "w"); struct twoBitFile *twoBit = twoBitOpen(twoBitIn); struct mafAli *mafList, *maf; struct mafFile *mf = mafOpen(mafIn); struct hash *bedHash = newHash(6); if (nBedFile != NULL) { struct lineFile *lf = lineFileOpen(nBedFile, TRUE); char *row[1]; while (lineFileRow(lf, row)) { addBed(row[0], bedHash); } lineFileClose(&lf); } speciesHash = newHash(6); mafList = readMafs(mf); mafWriteStart(f, mf->scoring); mafFileFree(&mf); chainStrands(strandHeads, bedHash); bridgeSpecies(mafList, speciesList); fillHoles(mafList, speciesList, twoBit); for(maf = mafList; maf ; maf = maf->next) mafWrite(f, maf); }
static void processBed6(char *database, char *track, FILE *f, struct bed *bed, struct slName *orgList) /* generate MAF alignment for a bed6 */ { struct mafAli *maf; char *useName = refCoords ? NULL : bed->name; if (txStarts) { maf = hgMafFrag(database, track, bed->chrom, bed->chromStart, bed->chromEnd, bed->strand[0], useName, orgList); maf->regDef = mafRegDefNew(mafRegDefTxUpstream, bed->chromEnd-bed->chromStart, bed->name); if (meFirst) moveMeToFirst(maf, database); } else { maf = hgMafFrag(database, track, bed->chrom, bed->chromStart, bed->chromEnd, bed->strand[0], useName, orgList); if (meFirst) moveMeToFirst(maf, bed->name); } mafWrite(f, maf); mafAliFree(&maf); }
/* write a block to a MAF */ static void writeBlkToMaf(struct malnBlk *blk, FILE *mafFh) { malnBlk_validate(blk); struct mafAli *ma = malnAliToMafAli(blk); checkMafAli(ma); mafWrite(mafFh, ma); mafAliFree(&ma); }
void mafMeFirst(char *inMaf, char *meFile, char *outMaf) /* mafMeFirst - Move component to top if it is one of the named ones. Useful * in conjunction with mafFrags when you don't want the one with the gene name * to be in the middle.. */ { struct hash *meHash = hashWordsInFile(meFile, 18); struct mafFile *mf = mafOpen(inMaf); FILE *f = mustOpen(outMaf, "w"); mafWriteStart(f, mf->scoring); struct mafAli *maf; while ((maf = mafNext(mf)) != NULL) { struct mafComp *comp = compInHash(maf, meHash); if (comp == NULL) errAbort("No components in %s in maf ending line %d of %s", meFile, mf->lf->lineIx, mf->lf->fileName); slRemoveEl(&maf->components, comp); slAddHead(&maf->components, comp); mafWrite(f, maf); mafAliFree(&maf); } mafWriteEnd(f); carefulClose(&f); }
void faToMaf(char *inFa, char *outMaf) /* faToMaf - Convert fa multiple alignment format to maf. */ { struct dnaSeq *seqList = readMultiFa(inFa); struct mafAli *maf = mafFromSeqList(seqList, inFa); FILE *f = mustOpen(outMaf, "w"); mafWriteStart(f, NULL); mafWrite(f, maf); carefulClose(&f); }
void mafWriteAll(struct mafFile *mf, char *fileName) /* Write out full mafFile. */ { FILE *f = mustOpen(fileName, "w"); struct mafAli *ali; mafWriteStart(f, mf->scoring); for (ali = mf->alignments; ali != NULL; ali = ali->next) mafWrite(f, ali); mafWriteEnd(f); carefulClose(&f); }
static void mafFrags(char *database, char *track, char *bedFile, char *mafFile) /* mafFrags - Collect MAFs from regions specified in a 6 column bed file. */ { struct slName *orgList = NULL; struct lineFile *lf = lineFileOpen(bedFile, TRUE); FILE *f = mustOpen(mafFile, "w"); if (optionExists("orgs")) { char *orgFile = optionVal("orgs", NULL); char *buf; readInGulp(orgFile, &buf, NULL); orgList = stringToSlNames(buf); /* Ensure that org list starts with database. */ struct slName *me = slNameFind(orgList, database); if (me == NULL) errAbort("Need to have reference database '%s' in %s", database, orgFile); if (me != orgList) { slRemoveEl(&orgList, me); slAddHead(&orgList, me); } } mafWriteStart(f, "zero"); if (bed12) { char *row[12]; while (lineFileRow(lf, row)) { struct bed *bed = bedLoadN(row, ArraySize(row)); struct mafAli *maf = mafFromBed12(database, track, bed, orgList); if (meFirst) moveMeToFirst(maf, bed->name); mafWrite(f, maf); mafAliFree(&maf); bedFree(&bed); } } else { char *row[6]; while (lineFileRow(lf, row)) { struct bed *bed = bedLoadN(row, ArraySize(row)); processBed6(database, track, f, bed, orgList); bedFree(&bed); } } mafWriteEnd(f); carefulClose(&f); }
static void mafQueryOut(struct gfOutput *out, FILE *f) /* Do axt oriented output - at end of processing query. */ { struct axtData *aod = out->data; struct axtBundle *gab; for (gab = aod->bundleList; gab != NULL; gab = gab->next) { struct axt *axt; for (axt = gab->axtList; axt != NULL; axt = axt->next) { struct mafAli temp; mafFromAxtTemp(axt, gab->tSize, gab->qSize, &temp); mafWrite(f, &temp); } } axtBundleFreeList(&aod->bundleList); }
void extractMafs(char *file, FILE *f, struct hash *regionHash) /* extract MAFs in a file from regions specified in hash */ { char *chrom = NULL; struct bed *bed = NULL; struct mafFile *mf = mafOpen(file); struct mafAli *maf = NULL; struct mafComp *mc; char path[256]; verbose(1, "extracting from %s\n", file); maf = mafNext(mf); while (maf) { mc = maf->components; if (!chrom || differentString(chrom, chromFromSrc(mc->src))) chrom = cloneString(chromFromSrc(mc->src)); /* new chrom */ bed = (struct bed *)hashFindVal(regionHash, chrom); if (!bed) { /* no regions on this chrom -- skip to next chrom */ do mafAliFree(&maf); while (((maf = mafNext(mf)) != NULL) && sameString(chromFromSrc(maf->components->src), chrom)); continue; // start over with this maf } verbose(2, "region: %s:%d-%d\n", bed->chrom, bed->chromStart+1, bed->chromEnd); if (outDir) { if (f) endOutFile(f); safef(path, sizeof (path), "%s/%s.maf", dir, bed->name); f = startOutFile(path); } /* skip mafs before region, stopping if chrom changes */ while (maf && (mc = maf->components) && sameString(chrom, chromFromSrc(mc->src)) && (mc->start + mc->size) <= bed->chromStart) { mafAliFree(&maf); maf = mafNext(mf); } /* extract all mafs and pieces of mafs in region */ while (maf && (mc = maf->components) && sameString(chrom, chromFromSrc(mc->src)) && (bed->chromStart < mc->start + mc->size && bed->chromEnd > mc->start)) { int mafStart = mc->start; int mafEnd = mc->start + mc->size; struct mafAli *full = maf; if (mafStart < bed->chromStart || mafEnd > bed->chromEnd) { full = maf; maf = mafSubsetE(full, mc->src, bed->chromStart, bed->chromEnd, keepInitialGaps); mc = maf->components; } verbose(2, " %s:%d-%d\n", chrom, mc->start+1, mc->start + mc->size); mafWrite(f, maf); struct mafAli *nextMaf = (mafEnd > bed->chromEnd+1) ? mafSubset(full, mc->src, bed->chromEnd+1, mafEnd) : mafNext(mf); if (maf != full) mafAliFree(&maf); mafAliFree(&full); maf = nextMaf; } /* get next region */ hashRemove(regionHash, bed->chrom); if (bed->next) hashAdd(regionHash, bed->chrom, bed->next); } mafFileFree(&mf); }
void mafWriteGood(FILE *f, struct mafAli *maf) /* Write out maf if it's not all dash. */ { if (!mafAllDash(maf)) mafWrite(f, maf); }
void xmfaToMaf(char *in, char *out) /* xmfaToMaf - Convert from xmfa to maf format. */ { int c; FILE *input = mustOpen(in, "r"); FILE *output = mustOpen(out, "w"); char* commentLine; struct dnaSeq* sequence; struct mafAli *ali; struct sqlConnection* conn = hAllocConn(); mafWriteStart(output, "mlagan"); AllocVar(ali); while(myFaReadMixedNext(input, TRUE, "default name", TRUE, &commentLine, &sequence)) { char srcName[128]; c = fgetc(input); if(c == '=' || c == '>') { /* add the current sequence and process the block if we've see an '='*/ char org[32]; char chrom[32]; int start; int stop; char strand; struct mafComp *comp; double score; char buffer[1024]; ungetc(c, input); AllocVar(comp); /* parse the comment line */ sscanf(commentLine, ">%s %[^:]:%d-%d %c", org, chrom, &start, &stop, &strand); /* build the name */ safef(srcName, sizeof(srcName), "%s.%s", optionVal(org, org), chrom); comp->src = cloneString(srcName); sqlSafef(buffer, 1024, "SELECT size FROM %s.chromInfo WHERE chrom = \"%s\"", optionVal(org, org), chrom); assert(sqlQuickQuery(conn, buffer, buffer, 1024) != 0); comp->srcSize = atoi(buffer); comp->strand = strand; start = start - 1; comp->start = start; comp->size = ungappedSize(sequence); if(strand == '-') comp->start = comp->srcSize - (comp->start + comp->size); comp->text = sequence->dna; sequence->dna = 0; slAddHead(&ali->components, comp); freeDnaSeq(&sequence); if(c == '=') { fscanf(input, "= score=%lf\n", &score); ali->score = score; slReverse(&ali->components); mafWrite(output, ali); mafAliFree(&ali); AllocVar(ali); } } } mafWriteEnd(output); }