コード例 #1
0
void mafOrtholog(char *database, char *track, char *genePredFile, char *outFile)
/* mafOrtholog - find orthlogs in other species based on maf alignment and reference genePred */
{
struct slName *orgList = NULL;
FILE *f = mustOpen(outFile, "w");
struct genePredReader *gpr = genePredReaderFile(genePredFile, NULL);
struct genePred *gpList = genePredReaderAll(gpr), *gp = NULL;
struct sqlConnection *conn = hAllocConn();

if (optionExists("nibDir"))
    nibDir = optionVal("nibDir", NULL);
if (optionExists("orgs"))
    {
    char *orgFile = optionVal("orgs", NULL);
    char *buf;
    readInGulp(orgFile, &buf, NULL);
    orgList = stringToSlNames(buf);
    }

for (gp = gpList ; gp != NULL ; gp=gp->next)
    {
    struct mafAli *maf = NULL;
    if (thickOnly)
        maf = mafLoadInRegion(conn, track,
        gp->chrom, gp->cdsStart, gp->cdsEnd);
    else
        maf = mafLoadInRegion(conn, track,
        gp->chrom, gp->txStart, gp->txEnd);
    if (meFirst)
        moveMeToFirst(maf, database);
    printOrthologs(f, maf, gp);
    mafAliFree(&maf);
    }
carefulClose(&f);
}
コード例 #2
0
ファイル: mafToProtein.c プロジェクト: elmargb/kentUtils
/* get the maf alignments for a particular mafFrame */
struct mafAli *getAliForFrame(char *mafTable, struct mafFrames *frame)
{
struct sqlConnection *conn = hAllocConn();
struct mafAli *aliAll = mafLoadInRegion(conn, mafTable,
	frame->chrom, frame->chromStart, frame->chromEnd );
struct mafAli *ali;
struct mafAli *list = NULL;
struct mafAli *nextAli;

for(ali = aliAll; ali; ali = nextAli)
    {
    nextAli = ali->next;
    ali->next = NULL;

    char *masterSrc = ali->components->src;
    struct mafAli *subAli = NULL;

    if (mafNeedSubset(ali, masterSrc, frame->chromStart, frame->chromEnd))
	{
	subAli = mafSubset( ali, masterSrc, 
	    frame->chromStart, frame->chromEnd);
	if (subAli == NULL)
	    continue;
	}

    if (subAli)
	{
	slAddHead(&list, subAli);
	mafAliFree(&ali);
	}
    else
	slAddHead(&list, ali);
    }
slReverse(&list);

int size = 0;
for(ali = list; ali; ali = ali->next)
    {
    size += ali->components->size;
    }
assert(size == frame->chromEnd - frame->chromStart);

hFreeConn(&conn);

return list;
}
コード例 #3
0
ファイル: mafGene.c プロジェクト: elmargb/kentUtils
static struct mafAli *getAliForRange(char *database, char *mafTable, 
    char *chrom, int start, int end)
{
struct sqlConnection *conn = hAllocConn(database);
struct mafAli *aliAll = mafLoadInRegion(conn, mafTable,
	chrom, start, end);
struct mafAli *ali;
struct mafAli *list = NULL;
struct mafAli *nextAli;

hFreeConn(&conn);

for(ali = aliAll; ali; ali = nextAli)
    {
    nextAli = ali->next;
    ali->next = NULL;

    char *masterSrc = ali->components->src;
    struct mafAli *subAli = NULL;

    if (mafNeedSubset(ali, masterSrc, start, end))
	{
	subAli = mafSubset( ali, masterSrc, start, end);
	if (subAli == NULL)
	    continue;
	}

    if (subAli)
	{
	slAddHead(&list, subAli);
	mafAliFree(&ali);
	}
    else
	slAddHead(&list, ali);
    }
slReverse(&list);

list = padOutAli(list, database, chrom, start, end);

return list;
}
コード例 #4
0
struct mafAli *hgMafFrag(
	char *database,     /* Database, must already have hSetDb to this */
	char *track, 	    /* Name of MAF track */
	char *chrom, 	    /* Chromosome (in database genome) */
	int start, int end, /* start/end in chromosome */
	char strand, 	    /* Chromosome strand. */
	char *outName, 	    /* Optional name to use in first component */
	struct slName *orderList /* Optional order of organisms. */
	)
/* mafFrag- Extract maf sequences for a region from database.
 * This creates a somewhat unusual MAF that extends from start
 * to end whether or not there are actually alignments.  Where
 * there are no alignments (or alignments missing a species)
 * a . character fills in.   The score is always zero, and
 * the sources just indicate the species.  You can mafFree this
 * as normal. */
{
int chromSize = hChromSize(database, chrom);
struct sqlConnection *conn = hAllocConn(database);
struct dnaSeq *native = hChromSeq(database, chrom, start, end);
struct mafAli *maf, *mafList = mafLoadInRegion(conn, track, chrom, start, end);
char masterSrc[128];
struct hash *orgHash = newHash(10);
struct oneOrg *orgList = NULL, *org, *nativeOrg = NULL;
int curPos = start, symCount = 0;
struct slName *name;
int order = 0;

/* Check that the mafs are really copacetic, the particular
 * subtype we think is in the database that this (relatively)
 * simple code can handle. */
safef(masterSrc, sizeof(masterSrc), "%s.%s", database, chrom);
mafCheckFirstComponentSrc(mafList, masterSrc);
mafCheckFirstComponentStrand(mafList, '+');
slSort(&mafList, mafCmp);

/* Prebuild organisms if possible from input orderList. */
for (name = orderList; name != NULL; name = name->next)
    {
    AllocVar(org);
    slAddHead(&orgList, org);
    hashAddSaveName(orgHash, name->name, org, &org->name);
    org->dy = dyStringNew(native->size*1.5);
    org->order = order++;
    if (nativeOrg == NULL)
        nativeOrg = org;
    }
if (orderList == NULL)
    {
    AllocVar(org);
    slAddHead(&orgList, org);
    hashAddSaveName(orgHash, database, org, &org->name);
    org->dy = dyStringNew(native->size*1.5);
    if (nativeOrg == NULL)
        nativeOrg = org;
    }

/* Go through all mafs in window, mostly building up
 * org->dy strings. */
for (maf = mafList; maf != NULL; maf = maf->next)
    {
    struct mafComp *mc, *mcMaster = maf->components;
    struct mafAli *subMaf = NULL;
    order = 0;
    if (curPos < mcMaster->start)
	{
	fillInMissing(nativeOrg, orgList, native, start,
		curPos, mcMaster->start);
	symCount += mcMaster->start - curPos;
	}
    if (curPos < mcMaster->start + mcMaster->size) /* Prevent worst
    						    * backtracking */
	{
	if (mafNeedSubset(maf, masterSrc, curPos, end))
	    {
	    subMaf = mafSubset(maf, masterSrc, curPos, end);
	    if (subMaf == NULL)
	        continue;
	    }
	else
	    subMaf = maf;
	for (mc = subMaf->components; mc != NULL; mc = mc->next, ++order)
	    {
	    /* Extract name up to dot into 'orgName' */
	    char buf[128], *e, *orgName;

	    if ((mc->size == 0) || (mc->srcSize == 0)) /* skip over components without sequence */
		continue;

	    mc->leftStatus = mc->rightStatus = 0; /* squash annotation */

	    e = strchr(mc->src, '.');
	    if (e == NULL)
		orgName = mc->src;
	    else
		{
		int len = e - mc->src;
		if (len >= sizeof(buf))
		    errAbort("organism/database name %s too long", mc->src);
		memcpy(buf, mc->src, len);
		buf[len] = 0;
		orgName = buf;
		}

	    /* Look up dyString corresponding to  org, and create a
	     * new one if necessary. */
	    org = hashFindVal(orgHash, orgName);
	    if (org == NULL)
		{
		if (orderList != NULL)
		   errAbort("%s is not in orderList", orgName);
		AllocVar(org);
		slAddHead(&orgList, org);
		hashAddSaveName(orgHash, orgName, org, &org->name);
		org->dy = dyStringNew(native->size*1.5);
		dyStringAppendMultiC(org->dy, '.', symCount);
		if (nativeOrg == NULL)
		    nativeOrg = org;
		}
	    if (orderList == NULL && order > org->order)
		org->order = order;
	    org->hit = TRUE;

	    /* Fill it up with alignment. */
	    dyStringAppendN(org->dy, mc->text, subMaf->textSize);
	    }
	for (org = orgList; org != NULL; org = org->next)
	    {
	    if (!org->hit)
		dyStringAppendMultiC(org->dy, '.', subMaf->textSize);
	    org->hit = FALSE;
	    }
	symCount += subMaf->textSize;
	curPos = mcMaster->start + mcMaster->size;
	if (subMaf != maf)
	    mafAliFree(&subMaf);
	}
    }
if (curPos < end)
    {
    fillInMissing(nativeOrg, orgList, native, start, curPos, end);
    symCount += end - curPos;
    }
mafAliFreeList(&mafList);

slSort(&orgList, oneOrgCmp);
if (strand == '-')
    {
    for (org = orgList; org != NULL; org = org->next)
	reverseComplement(org->dy->string, org->dy->stringSize);
    }

/* Construct our maf */
AllocVar(maf);
maf->textSize = symCount;
for (org = orgList; org != NULL; org = org->next)
    {
    struct mafComp *mc;
    AllocVar(mc);
    if (org == orgList)
        {
	if (outName != NULL)
	    {
	    mc->src = cloneString(outName);
	    mc->srcSize = native->size;
	    mc->strand = '+';
	    mc->start = 0;
	    mc->size = native->size;
	    }
	else
	    {
	    mc->src = cloneString(masterSrc);
	    mc->srcSize = chromSize;
	    mc->strand = strand;
	    if (strand == '-')
	       reverseIntRange(&start, &end, chromSize);
	    mc->start = start;
	    mc->size = end-start;
	    }
	}
    else
        {
	int size = countAlpha(org->dy->string);
	mc->src = cloneString(org->name);
	mc->srcSize = size;
	mc->strand = '+';
	mc->start = 0;
	mc->size = size;
	}
    mc->text = cloneString(org->dy->string);
    dyStringFree(&org->dy);
    slAddHead(&maf->components, mc);
    }
slReverse(&maf->components);

slFreeList(&orgList);
freeHash(&orgHash);
hFreeConn(&conn);
return maf;
}