Пример #1
0
int main(int argc, char** argv) {
	struct mafFile* mf;
	struct mafAli* ali;
	struct mafComp* mc;
	
	if ( argc != 2) {
		printf("remove_self maf-file\n");
		return 1;
	}
	
	init_scores70();
	
	mafWriteStart(stdout, 0);
	
	mf = mafOpen(argv[1], 0);
	while((ali = mafNext(mf)) != NULL) {
		mc = ali->components;

		if(mc->next->strand == '+' && mc->start > mc->next->start)
			continue;
		else if(mc->next->strand == '-' && mc->start > (mc->next->srcSize - mc->next->start - mc->next->size))
			continue;
		else
			mafWrite(stdout, ali);
	}

	mafFileFree(&mf);
	
	mafWriteEnd(stdout);

	return 0;
}
void mafAddIRows(char *mafIn, char *twoBitIn,  char *mafOut, char *nBedFile)
/* mafAddIRows - Filter out maf files. */
{
FILE *f = mustOpen(mafOut, "w");
struct twoBitFile *twoBit = twoBitOpen(twoBitIn);
struct mafAli *mafList, *maf;
struct mafFile *mf = mafOpen(mafIn);
struct hash *bedHash = newHash(6); 

if (nBedFile != NULL)
    {
    struct lineFile *lf = lineFileOpen(nBedFile, TRUE);
    char *row[1];
    while (lineFileRow(lf, row))
	{
	addBed(row[0], bedHash);
	}
    lineFileClose(&lf);
    }

speciesHash = newHash(6);
mafList = readMafs(mf);
mafWriteStart(f, mf->scoring);
mafFileFree(&mf);

chainStrands(strandHeads, bedHash);
bridgeSpecies(mafList, speciesList);
fillHoles(mafList, speciesList, twoBit);

for(maf = mafList; maf ; maf = maf->next)
    mafWrite(f, maf);
}
Пример #3
0
static void processBed6(char *database, char *track, FILE *f, struct bed *bed,
                        struct slName *orgList)
/* generate MAF alignment for a bed6 */
{
struct mafAli *maf; 
char *useName = refCoords ? NULL : bed->name;
if (txStarts)
    {
    maf = hgMafFrag(database, track, 
                    bed->chrom, bed->chromStart, bed->chromEnd, bed->strand[0],
                    useName, orgList);
    maf->regDef = mafRegDefNew(mafRegDefTxUpstream,
                               bed->chromEnd-bed->chromStart,
                               bed->name);
    if (meFirst)
        moveMeToFirst(maf, database);
    }
else
    {
    maf = hgMafFrag(database, track, 
                    bed->chrom, bed->chromStart, bed->chromEnd, bed->strand[0],
                    useName, orgList);
    if (meFirst)
        moveMeToFirst(maf, bed->name);
    }
mafWrite(f, maf);
mafAliFree(&maf);
} 
Пример #4
0
/* write a block to a MAF */
static void writeBlkToMaf(struct malnBlk *blk, FILE *mafFh) {
    malnBlk_validate(blk);
    struct mafAli *ma = malnAliToMafAli(blk);
    checkMafAli(ma);
    mafWrite(mafFh, ma);
    mafAliFree(&ma);
}
void mafMeFirst(char *inMaf, char *meFile, char *outMaf)
/* mafMeFirst - Move component to top if it is one of the named ones.  Useful 
 * in conjunction with mafFrags when you don't want the one with the gene name 
 * to be in the middle.. */
{
struct hash *meHash = hashWordsInFile(meFile, 18);
struct mafFile *mf = mafOpen(inMaf);
FILE *f = mustOpen(outMaf, "w");
mafWriteStart(f, mf->scoring);
struct mafAli *maf;
while ((maf = mafNext(mf)) != NULL)
    {
    struct mafComp *comp = compInHash(maf, meHash);
    if (comp == NULL)
        errAbort("No components in %s in maf ending line %d of %s",
		meFile, mf->lf->lineIx, mf->lf->fileName);
    slRemoveEl(&maf->components, comp);
    slAddHead(&maf->components, comp);
    mafWrite(f, maf);
    mafAliFree(&maf);
    }

mafWriteEnd(f);
carefulClose(&f);
}
Пример #6
0
void faToMaf(char *inFa, char *outMaf)
/* faToMaf - Convert fa multiple alignment format to maf. */
{
struct dnaSeq *seqList = readMultiFa(inFa);
struct mafAli *maf = mafFromSeqList(seqList, inFa);
FILE *f = mustOpen(outMaf, "w");
mafWriteStart(f, NULL);
mafWrite(f, maf);
carefulClose(&f);
}
Пример #7
0
void mafWriteAll(struct mafFile *mf, char *fileName)
/* Write out full mafFile. */
{
FILE *f = mustOpen(fileName, "w");
struct mafAli *ali;
mafWriteStart(f, mf->scoring);
for (ali = mf->alignments; ali != NULL; ali = ali->next)
    mafWrite(f, ali);
mafWriteEnd(f);
carefulClose(&f);
}
Пример #8
0
static void mafFrags(char *database, char *track, char *bedFile, char *mafFile)
/* mafFrags - Collect MAFs from regions specified in a 6 column bed file. */
{
struct slName *orgList = NULL;
struct lineFile *lf = lineFileOpen(bedFile, TRUE);
FILE *f = mustOpen(mafFile, "w");

if (optionExists("orgs"))
    {
    char *orgFile = optionVal("orgs", NULL);
    char *buf;
    readInGulp(orgFile, &buf, NULL);
    orgList = stringToSlNames(buf);

    /* Ensure that org list starts with database. */
    struct slName *me = slNameFind(orgList, database);
    if (me == NULL)
        errAbort("Need to have reference database '%s' in %s", database, orgFile);
    if (me != orgList)
        {
	slRemoveEl(&orgList, me);
	slAddHead(&orgList, me);
	}
    }
mafWriteStart(f, "zero");

if (bed12)
    {
    char *row[12];
    while (lineFileRow(lf, row))
	{
	struct bed *bed = bedLoadN(row, ArraySize(row));
	struct mafAli *maf = mafFromBed12(database, track, bed, orgList);
	if (meFirst)
	    moveMeToFirst(maf, bed->name);
	mafWrite(f, maf);
	mafAliFree(&maf);
	bedFree(&bed);
	}
    }
else
    {
    char *row[6];
    while (lineFileRow(lf, row))
	{
	struct bed *bed = bedLoadN(row, ArraySize(row));
        processBed6(database, track, f, bed, orgList);
	bedFree(&bed);
	}
    }
mafWriteEnd(f);
carefulClose(&f);
}
Пример #9
0
static void mafQueryOut(struct gfOutput *out, FILE *f)
/* Do axt oriented output - at end of processing query. */
{
    struct axtData *aod = out->data;
    struct axtBundle *gab;
    for (gab = aod->bundleList; gab != NULL; gab = gab->next)
    {
        struct axt *axt;
        for (axt = gab->axtList; axt != NULL; axt = axt->next)
        {
            struct mafAli temp;
            mafFromAxtTemp(axt, gab->tSize, gab->qSize, &temp);
            mafWrite(f, &temp);
        }
    }
    axtBundleFreeList(&aod->bundleList);
}
Пример #10
0
void extractMafs(char *file, FILE *f, struct hash *regionHash)
/* extract MAFs in a file from regions specified in hash */
{
char *chrom = NULL;
struct bed *bed = NULL;
struct mafFile *mf = mafOpen(file);
struct mafAli *maf = NULL;
struct mafComp *mc;
char path[256];

verbose(1, "extracting from %s\n", file);
maf = mafNext(mf);
while (maf)
    {
    mc = maf->components;
    if (!chrom || differentString(chrom, chromFromSrc(mc->src)))
        chrom = cloneString(chromFromSrc(mc->src));         /* new chrom */
    bed = (struct bed *)hashFindVal(regionHash, chrom);
    if (!bed)
        {
        /* no regions on this chrom -- skip to next chrom */
        do
            mafAliFree(&maf);
        while (((maf = mafNext(mf)) != NULL) && sameString(chromFromSrc(maf->components->src), chrom));
        continue;  // start over with this maf
        }
    verbose(2, "region: %s:%d-%d\n", 
            bed->chrom, bed->chromStart+1, bed->chromEnd);
    if (outDir)
        {
        if (f)
            endOutFile(f);
        safef(path, sizeof (path), "%s/%s.maf", dir, bed->name);
        f = startOutFile(path);
        }

    /* skip mafs before region, stopping if chrom changes */
    while (maf && (mc = maf->components) && sameString(chrom, chromFromSrc(mc->src)) &&
        (mc->start + mc->size) <= bed->chromStart)
        {
        mafAliFree(&maf);
        maf = mafNext(mf);
        }

    /* extract all mafs and pieces of mafs in region */
    while (maf && (mc = maf->components) && sameString(chrom, chromFromSrc(mc->src)) &&
        (bed->chromStart < mc->start + mc->size && bed->chromEnd > mc->start))
        {
        int mafStart = mc->start;
        int mafEnd = mc->start + mc->size;
        struct mafAli *full = maf;
        if (mafStart < bed->chromStart || mafEnd > bed->chromEnd)
            {
            full = maf;
            maf = mafSubsetE(full, mc->src, bed->chromStart, bed->chromEnd, keepInitialGaps);
            mc = maf->components;
            }
        verbose(2, "   %s:%d-%d\n", chrom, mc->start+1, mc->start + mc->size);
        mafWrite(f, maf);
        struct mafAli *nextMaf = (mafEnd > bed->chromEnd+1)
            ? mafSubset(full, mc->src, bed->chromEnd+1, mafEnd) : mafNext(mf);
        if (maf != full)
            mafAliFree(&maf);
        mafAliFree(&full);
        maf = nextMaf;
        }
    /* get next region */
    hashRemove(regionHash, bed->chrom);
    if (bed->next)
        hashAdd(regionHash, bed->chrom, bed->next);
    }
mafFileFree(&mf);
}
Пример #11
0
void mafWriteGood(FILE *f, struct mafAli *maf)
/* Write out maf if it's not all dash. */
{
if (!mafAllDash(maf))
    mafWrite(f, maf);
}
Пример #12
0
void xmfaToMaf(char *in, char *out)
/* xmfaToMaf - Convert from xmfa to maf format. */
{
int c;
FILE *input  = mustOpen(in,  "r");
FILE *output = mustOpen(out, "w");

char* commentLine;
struct dnaSeq* sequence;

struct mafAli *ali;

struct sqlConnection* conn = hAllocConn();

mafWriteStart(output, "mlagan");

AllocVar(ali);
while(myFaReadMixedNext(input, TRUE, "default name", TRUE, &commentLine, &sequence)) {
    char srcName[128];
    
    c = fgetc(input);
    if(c == '=' || c == '>') { /* add the current sequence and process the block if we've see an '='*/
        char org[32];
        char chrom[32];
        int start;
        int stop;
        char strand;
        struct mafComp *comp;
        double score;

        char buffer[1024];

        ungetc(c, input);
        
        AllocVar(comp);
        /* parse the comment line */
        sscanf(commentLine, ">%s %[^:]:%d-%d %c", org, chrom, &start, &stop, &strand);
        /* build the name */
        safef(srcName, sizeof(srcName), "%s.%s", optionVal(org, org), chrom);
        comp->src = cloneString(srcName);

        sqlSafef(buffer, 1024, "SELECT size FROM %s.chromInfo WHERE chrom = \"%s\"", optionVal(org, org), chrom);
        assert(sqlQuickQuery(conn, buffer, buffer, 1024) != 0);
        comp->srcSize = atoi(buffer);

        comp->strand = strand;

        start = start - 1;

        comp->start = start;
        comp->size = ungappedSize(sequence);

        if(strand == '-')
            comp->start = comp->srcSize - (comp->start + comp->size);
        
        comp->text = sequence->dna;
        sequence->dna = 0;
        slAddHead(&ali->components, comp);
        freeDnaSeq(&sequence);

        if(c == '=') {
            fscanf(input, "= score=%lf\n", &score);

            ali->score = score;

            slReverse(&ali->components);
            mafWrite(output, ali);
            mafAliFree(&ali);

            AllocVar(ali);
        }
    }
}

mafWriteEnd(output);
}