Esempio n. 1
0
int main(int argc, char** argv) {
	struct mafFile* mf;
	struct mafAli* ali;
	struct mafComp* mc;
	
	if ( argc != 2) {
		printf("remove_self maf-file\n");
		return 1;
	}
	
	init_scores70();
	
	mafWriteStart(stdout, 0);
	
	mf = mafOpen(argv[1], 0);
	while((ali = mafNext(mf)) != NULL) {
		mc = ali->components;

		if(mc->next->strand == '+' && mc->start > mc->next->start)
			continue;
		else if(mc->next->strand == '-' && mc->start > (mc->next->srcSize - mc->next->start - mc->next->size))
			continue;
		else
			mafWrite(stdout, ali);
	}

	mafFileFree(&mf);
	
	mafWriteEnd(stdout);

	return 0;
}
void mafAddIRows(char *mafIn, char *twoBitIn,  char *mafOut, char *nBedFile)
/* mafAddIRows - Filter out maf files. */
{
FILE *f = mustOpen(mafOut, "w");
struct twoBitFile *twoBit = twoBitOpen(twoBitIn);
struct mafAli *mafList, *maf;
struct mafFile *mf = mafOpen(mafIn);
struct hash *bedHash = newHash(6); 

if (nBedFile != NULL)
    {
    struct lineFile *lf = lineFileOpen(nBedFile, TRUE);
    char *row[1];
    while (lineFileRow(lf, row))
	{
	addBed(row[0], bedHash);
	}
    lineFileClose(&lf);
    }

speciesHash = newHash(6);
mafList = readMafs(mf);
mafWriteStart(f, mf->scoring);
mafFileFree(&mf);

chainStrands(strandHeads, bedHash);
bridgeSpecies(mafList, speciesList);
fillHoles(mafList, speciesList, twoBit);

for(maf = mafList; maf ; maf = maf->next)
    mafWrite(f, maf);
}
Esempio n. 3
0
void mafsInRegion(char *regionFile, char *out, int mafCount, char *mafFiles[])
/* Extract MAFs in regions listed in regin file */
{
int i = 0;
struct hash *bedHash = NULL;
FILE *f = NULL;
struct mafFile *mf = NULL;

verbose(1, "Extracting from %d files to %s\n", mafCount, out);
bedHash = loadRegions(regionFile);

/* get scoring scheme */
mf = mafOpen(mafFiles[0]);
if (!mf)
    errAbort("can't open MAF file: %s\n", mafFiles[0]);
scoring = cloneString(mf->scoring);
mafFileFree(&mf);

/* set up output dir */
if (outDir)
    {
    dir = out;
    makeDir(dir);
    }
else
    f = startOutFile(out);
for (i = 0; i < mafCount; i++)
    extractMafs(mafFiles[i], f, bedHash);
if (!outDir)
    endOutFile(f);
}
struct mafAli *mafLoadInRegion2(struct sqlConnection *conn,
    struct sqlConnection *conn2, char *table, char *chrom,
    int start, int end, char *file)
/* Return list of alignments in region. */
{
char **row;
unsigned int extFileId = 0;
struct mafAli *maf, *mafList = NULL;
struct mafFile *mf = NULL;
int rowOffset;

if (file != NULL)
    mf = mafOpen(file);

struct sqlResult *sr = hRangeQuery(conn, table, chrom,
    start, end, NULL, &rowOffset);

while ((row = sqlNextRow(sr)) != NULL)
    {
    struct scoredRef ref;
    scoredRefStaticLoad(row + rowOffset, &ref);
    if ((file != NULL) && (ref.extFile != 0))
	errAbort("expect extFile to be zero if file specified\n");
    if ((file == NULL) && (ref.extFile == 0))
	errAbort("expect extFile to be not zero or file specified\n");

    if (ref.extFile != extFileId)
	{
	char *path = hExtFileNameC(conn2, "extFile", ref.extFile);
	mafFileFree(&mf);
	mf = mafOpen(path);
	extFileId = ref.extFile;
	}
    lineFileSeek(mf->lf, ref.offset, SEEK_SET);
    maf = mafNext(mf);
    if (maf == NULL)
        internalErr();
    slAddHead(&mafList, maf);
    }
sqlFreeResult(&sr);
mafFileFree(&mf);
slReverse(&mafList);
/* hRangeQuery may return items out-of-order when bin is used in the query,
 * so sort here in order to avoid trouble at base-level view: */
slSort(&mafList, mafCmp);
return mafList;
}
Esempio n. 5
0
int main (int argc, char **argv)
{
	int nrow = DEFAULT_NROW,
	which_seq = 0,
	do_order = 0,
	misses_allowed = 0,
	output_mode = -1;
    float cutoff = DEFAULT_CUTOFF;
    char *strand = NULL,
	 *id = NULL;
    struct mafFile *file = NULL;
    struct mafAli *ali = NULL;
    struct mafComp *comp = NULL;
    struct MOTIF *motif = NULL;
    struct MATCH *matches = NULL;

    id = ckalloc (STRSIZE);
    get_args (argc, argv, &output_mode, &file, &id, &motif, &do_order, &nrow, &cutoff, &misses_allowed);
    strand = ckalloc (sizeof (char) * (nrow + 1));

    while (NULL != (ali = mafNext (file)))
    {
        for (comp = ali->components, which_seq = 0; comp; comp = comp->next, which_seq++)
            strand[which_seq] = comp->strand;
        strand[which_seq] = '\0';

	/* skip blocks that don't have all seqs in them */
 	if (which_seq != nrow)
	{
            mafAliFree (&ali);
	    continue;
	}

	/* forward strand */
	get_matches (&matches, FORWARD, ali, nrow, motif, do_order, misses_allowed);

	/* reverse strand */
        for (comp = ali->components; comp; comp = comp->next)
            do_revcomp((uchar *)comp->text, ali->textSize );

        get_matches (&matches, REVERSE, ali, nrow, motif, do_order, misses_allowed);

	/* output matches */
        if (matches)
            output_matches (matches, strand, id, nrow, motif);
        free_match_list (&matches);

        mafAliFree (&ali);
    }

    mafFileFree (&file);
    free (strand);
    free_motif_list (&motif);
    free (id);

    return 0;
}
Esempio n. 6
0
void reTraceFixMaf(char *mafFileName, char *qaFileName, char *newMafFileName)
/* reTraceFixMaf - Add quality line and recompute chrom line in maf. */
{
struct mafFile *mFile = mafReadAll(mafFileName);
struct qaSeq *qaList = qaRead(qaFileName);
struct hash *qaHash = makeQaHash(qaList);
struct mafAli *ali;
for (ali = mFile->alignments; ali != NULL; ali = ali->next)
    {
    if (!ali->components || !ali->components->next)
	errAbort("Something's up with the maf.");
    else
	{
	struct mafComp *secondSrc = ali->components->next;
	struct qaSeq *qas = hashMustFindVal(qaHash, secondSrc->src);
	int i, offset;
	int length = strlen(secondSrc->text);
	if (secondSrc->strand == '-')
	    reverseBytes(qas->qa, qas->size);
	offset = secondSrc->start;
	AllocArray(secondSrc->quality, length+1);
	for (i = 0; i < length; i++)
	    {
	    if (secondSrc->text[i] == '-')
		secondSrc->quality[i] = '-';
	    else
		{
		int q = (int)qas->qa[offset++];
		char c = 'F';
		if ((q >= 0) && (q < 45))
		    {
		    q = q / 5;
		    c = '0' + q;
		    }
		else if ((q >= 45) && (q < 98))
		    c = '9';
		else if (q == 99)
		    c = '0';
		else
		    c = 'F';
		secondSrc->quality[i] = c;
		}
	    }
	secondSrc->quality[length] = '\0';
	if (secondSrc->strand == '-')
	    reverseBytes(qas->qa, qas->size);
	}
    }
mafWriteAll(mFile, newMafFileName);
hashFree(&qaHash);
qaSeqFreeList(&qaList);
mafFileFree(&mFile);
}
Esempio n. 7
0
void mafFileFreeList(struct mafFile **pList)
/* Free up a list of maf files. */
{
struct mafFile *el, *next;

for (el = *pList; el != NULL; el = next)
    {
    next = el->next;
    mafFileFree(&el);
    }
*pList = NULL;
}
Esempio n. 8
0
/* Construct a malnSet from a MAF file. defaultBranchLength is used to
 * assign branch lengths when inferring trees from the MAF. */
struct malnSet *malnSet_constructFromMaf(struct Genomes *genomes, char *mafFileName, int maxInputBlkWidth, double defaultBranchLength, struct Genome *treelessRootGenome) {
    struct malnSet *malnSet = malnSet_construct(genomes, mafFileName);
    struct mafFile *mafFile = mafOpen(mafFileName);
    struct mafAli *ali;
    while ((ali = mafNext(mafFile)) != NULL) {
        checkMafAli(ali);
        addMafAli(malnSet, ali, maxInputBlkWidth, defaultBranchLength, treelessRootGenome);
        mafAliFree(&ali);
    }
    malnSet_assert(malnSet);
    mafFileFree(&mafFile);
    return malnSet;
}
void mafStats(char *twoBitFile, char *mafDir, char *outFile)
/* mafStats - Calculate basic stats on maf file including species-by-species 
 * coverage and percent ID. */
{
struct twoBitFile *tbf = twoBitOpen(twoBitFile);
FILE *f = mustOpen(outFile, "w");
struct twoBitIndex *ix;
long genomeSize = 0;
struct hash *speciesHash = hashNew(0);
struct speciesAcc *speciesList = NULL, *species;
for (ix = tbf->indexList; ix != NULL; ix = ix->next)
    {
    unsigned chromSize = twoBitSeqSizeNoNs(tbf, ix->name);
    genomeSize += chromSize;
    char mafFileName[PATH_LEN];
    safef(mafFileName, sizeof(mafFileName), "%s/%s.maf", mafDir, ix->name);
    struct mafFile *mf = mafMayOpen(mafFileName);
    verbose(1, "processing %s\n", ix->name);
    if (mf == NULL)
        {
	warn("%s doesn't exist", mafFileName);
	continue;
	}
    struct mafAli *maf;
    while ((maf = mafNext(mf)) != NULL)
        {
	struct mafComp *mc;
	for (mc = maf->components; mc != NULL; mc = mc->next)
	    {
	    if (mc->text != NULL)
		toUpperN(mc->text, maf->textSize);
	    }
	addCounts(maf, speciesHash, &speciesList);
	mafAliFree(&maf);
	}
    mafFileFree(&mf);
    }
slReverse(&speciesList);

for (species = speciesList; species != NULL; species = species->next)
    {
    fprintf(f, "counts: %s\t%ld\t%ld\t%ld\n", species->name, species->covCount, species->aliCount, species->idCount);
    fprintf(f, "precents: %s\t%4.2f%%\t%4.2f%%\t%4.2f%%\n", 
    	species->name, 100.0 * species->covCount/genomeSize,
	100.0 * species->aliCount/genomeSize,
	100.0 * species->idCount/species->aliCount);
    }
carefulClose(&f);
}
void mafToPsl(char *querySrc, char *targetSrc, char *inName, char *outName)
/* mafToPsl - Convert maf to psl format. */
{
struct mafFile *mf = mafOpen(inName);
FILE *pslFh = mustOpen(outName, "w");
struct mafAli *maf;

while ((maf = mafNext(mf)) != NULL)
    {
    mafAliToPsl(querySrc, targetSrc, maf, pslFh);
    mafAliFree(&maf);
    }
carefulClose(&pslFh);
mafFileFree(&mf);
}
Esempio n. 11
0
void mafToFa(char *inName, char *outName)
/* mafToFa - convert maf file to fasta. */
{
struct mafFile *mf = mafOpen(inName);
FILE *faFh = mustOpen(outName, "w");
struct mafAli *maf;

while ((maf = mafNext(mf)) != NULL)
    {
    mafAliToFa(maf, faFh);
    mafAliFree(&maf);
    }
carefulClose(&faFh);
mafFileFree(&mf);
}
Esempio n. 12
0
void extractMafs(char *file, FILE *f, struct hash *regionHash)
/* extract MAFs in a file from regions specified in hash */
{
char *chrom = NULL;
struct bed *bed = NULL;
struct mafFile *mf = mafOpen(file);
struct mafAli *maf = NULL;
struct mafComp *mc;
char path[256];

verbose(1, "extracting from %s\n", file);
maf = mafNext(mf);
while (maf)
    {
    mc = maf->components;
    if (!chrom || differentString(chrom, chromFromSrc(mc->src)))
        chrom = cloneString(chromFromSrc(mc->src));         /* new chrom */
    bed = (struct bed *)hashFindVal(regionHash, chrom);
    if (!bed)
        {
        /* no regions on this chrom -- skip to next chrom */
        do
            mafAliFree(&maf);
        while (((maf = mafNext(mf)) != NULL) && sameString(chromFromSrc(maf->components->src), chrom));
        continue;  // start over with this maf
        }
    verbose(2, "region: %s:%d-%d\n", 
            bed->chrom, bed->chromStart+1, bed->chromEnd);
    if (outDir)
        {
        if (f)
            endOutFile(f);
        safef(path, sizeof (path), "%s/%s.maf", dir, bed->name);
        f = startOutFile(path);
        }

    /* skip mafs before region, stopping if chrom changes */
    while (maf && (mc = maf->components) && sameString(chrom, chromFromSrc(mc->src)) &&
        (mc->start + mc->size) <= bed->chromStart)
        {
        mafAliFree(&maf);
        maf = mafNext(mf);
        }

    /* extract all mafs and pieces of mafs in region */
    while (maf && (mc = maf->components) && sameString(chrom, chromFromSrc(mc->src)) &&
        (bed->chromStart < mc->start + mc->size && bed->chromEnd > mc->start))
        {
        int mafStart = mc->start;
        int mafEnd = mc->start + mc->size;
        struct mafAli *full = maf;
        if (mafStart < bed->chromStart || mafEnd > bed->chromEnd)
            {
            full = maf;
            maf = mafSubsetE(full, mc->src, bed->chromStart, bed->chromEnd, keepInitialGaps);
            mc = maf->components;
            }
        verbose(2, "   %s:%d-%d\n", chrom, mc->start+1, mc->start + mc->size);
        mafWrite(f, maf);
        struct mafAli *nextMaf = (mafEnd > bed->chromEnd+1)
            ? mafSubset(full, mc->src, bed->chromEnd+1, mafEnd) : mafNext(mf);
        if (maf != full)
            mafAliFree(&maf);
        mafAliFree(&full);
        maf = nextMaf;
        }
    /* get next region */
    hashRemove(regionHash, bed->chrom);
    if (bed->next)
        hashAdd(regionHash, bed->chrom, bed->next);
    }
mafFileFree(&mf);
}
void hgLoadMafSummary(char *db, char *table, char *fileName)
/* hgLoadMafSummary - Load a summary table of pairs in a maf into a database. */
{
long mafCount = 0, allMafCount = 0;
struct mafComp *mcMaster = NULL;
struct mafAli *maf;
struct mafFile *mf = mafOpen(fileName);
struct sqlConnection *conn;
FILE *f = hgCreateTabFile(".", table);
long componentCount = 0;
struct hash *componentHash = newHash(0);

if (!test)
    {
    conn = sqlConnect(database);
    mafSummaryTableCreate(conn, table, hGetMinIndexLength(db));
    }
verbose(1, "Indexing and tabulating %s\n", fileName);

/* process mafs */
while ((maf = mafNext(mf)) != NULL)
    {
    mcMaster = mafMaster(maf, mf, fileName);
    allMafCount++;
    if (mcMaster->srcSize < minSeqSize)
	continue;
    while (mcMaster->size > maxSize)
        {
        /* break maf into maxSize pieces */
        int end = mcMaster->start + maxSize;
        struct mafAli *subMaf = 
                mafSubset(maf, mcMaster->src, mcMaster->start, end);
        verbose(3, "Splitting maf %s:%d len %d\n", mcMaster->src,
                                        mcMaster->start, mcMaster->size);
        componentCount += 
            processMaf(subMaf, componentHash, f, mf, fileName);
        mafAliFree(&subMaf);
        subMaf = mafSubset(maf, mcMaster->src, 
                                end, end + (mcMaster->size - maxSize));
        mafAliFree(&maf);
        maf = subMaf;
        mcMaster = mafMaster(maf, mf, fileName);
        }
    if (mcMaster->size != 0)
        {
        /* remainder of maf after splitting off maxSize submafs */
        componentCount += 
            processMaf(maf, componentHash, f, mf, fileName);
        }
    mafAliFree(&maf);
    mafCount++;
    }
mafFileFree(&mf);
flushSummaryBlocks(componentHash, f);
verbose(1, 
    "Created %ld summary blocks from %ld components and %ld mafs from %s\n",
        summaryCount, componentCount, allMafCount, fileName);
if (test)
    return;
verbose(1, "Loading into %s table %s...\n", database, table);
hgLoadTabFile(conn, ".", table, &f);
verbose(1, "Loading complete");
hgEndUpdate(&conn, "Add %ld maf summary blocks from %s\n", 
                        summaryCount, fileName);
}