Example #1
0
void mapAltGraphXFile(struct sqlConnection *conn, char *db, char *orthoDb, char *chrom,
		      char *netTable, char *altGraphXFileName, char *altGraphXTableName,
		      FILE *agxOut, FILE *selectedOut, int *foundCount, int *notFoundCount)
/* Map over altGraphX Structures from one organism to
another. Basically create a mapping for the vertices and then reverse
them if on '-' strand.*/
{
int count =0;
struct bed *bed = NULL;
struct altGraphX *agList = NULL, *ag = NULL, *agNew = NULL;

if(altGraphXFileName != NULL)
    {
    warn("Loading altGraphX Records from file %s.", altGraphXFileName);
    agList = altGraphXLoadAll(altGraphXFileName);
    }
else if(altGraphXTableName != NULL)
    {
    char query[256];
    warn("Reading altGraphX Records from table %s.", altGraphXTableName);
    sqlSafef(query, sizeof(query), "select * from %s where tName like '%s'", altGraphXTableName, chrom);
    agList = altGraphXLoadByQuery(conn, query);
    }
else
    errAbort("orthoMap::mapAlGraphXFile() - Need a table name or file name to load altGraphX records");
warn("Mapping altGraphX records.");
for(ag = agList; ag != NULL; ag = ag->next)
    {
    if(differentString(ag->tName, chrom))
	continue;
    occassionalDot();
    agNew = mapAltGraphX(ag, conn, db, netTable);
    if(agNew == NULL)
	(*notFoundCount)++;
    else
	{
	(*foundCount)++;
	altGraphXTabOut(agNew, agxOut);
	altGraphXFree(&agNew);
        if (selectedOut != NULL)
            altGraphXTabOut(ag, selectedOut);
	}
    count++;
    }	
}
void txgToAgx(char *inTxg, char *outAgx)
/* txgToAgx - Convert from txg (txGraph) format to agx (altGraphX). */
{
struct lineFile *lf = lineFileOpen(inTxg, TRUE);
char *row[TXGRAPH_NUM_COLS];
FILE *f = mustOpen(outAgx, "w");

while (lineFileRow(lf, row))
    {
    struct txGraph *txg = txGraphLoad(row);
    verbose(2, "loaded txGraph %s\n", txg->name);
    struct altGraphX *agx = txGraphToAltGraphX(txg);
    altGraphXTabOut(agx, f);
    altGraphXFree(&agx);
    txGraphFree(&txg);
    }

carefulClose(&f);
}
Example #3
0
struct altGraphX *mapAltGraphX(struct altGraphX *ag, struct sqlConnection *conn,
			       char *db, char *netTable )
/* Map one altGraphX record. Return NULL if can't find. This function
 is getting a bit long but it isn't easy to do...*/
{
struct altGraphX *agNew = NULL;
struct chain *chain = NULL;
struct chain *workingChain = NULL, *workingChainFree = NULL;
struct chain *subChain = NULL, *toFree = NULL;
int i,j,k;
int edgeCountNew =0;
int vCountNew=0;
bool reverse = FALSE;
int *starts = NULL, *sizes = NULL;
int blockCount =0;

/* Find the best chain (one that overlaps the most exons. */
AllocArray(starts, ag->edgeCount);
AllocArray(sizes, ag->edgeCount);
for(i=0; i<ag->edgeCount; i++)
    {
    if(getSpliceEdgeType(ag, i) == ggExon)
	{
	starts[blockCount] = ag->vPositions[ag->edgeStarts[i]];
	sizes[blockCount] = ag->vPositions[ag->edgeEnds[i]] - ag->vPositions[ag->edgeStarts[i]];
	blockCount++;
	}
    }
chain = chainForBlocks(conn, db, netTable, ag->tName, ag->tStart, ag->tEnd,
		       starts, sizes, blockCount);
freez(&starts);
freez(&sizes);

if(chain == NULL)
    return NULL;
/* Make a smaller chain to work on... */
chainSubSetForRegion(chain, ag->tStart-1, ag->tEnd+1, &workingChain, &workingChainFree);
if(workingChain == NULL)
    return NULL;
if (chain->qStrand == '-')
    reverse = TRUE;
agNew = altGraphXClone(ag);
freez(&agNew->tName);
agNew->tName = cloneString(chain->qName);
/* Map vertex positions using chain. */
for(i = 0; i < agNew->vertexCount; i++)
    {
    struct cBlock *bi = NULL;
    int targetPos = agNew->vPositions[i];
    struct chain *subChain=NULL, *toFree=NULL;
    agNew->vPositions[i] = -1;
    chainSubSetForRegion(workingChain, targetPos , targetPos, &subChain, &toFree);    
    if(subChain != NULL)
	{
	int qs, qe;
	qChainRangePlusStrand(subChain, &qs, &qe);
	agNew->vPositions[i] = qs;
	}
    chainFree(&toFree);
    }
/* Prune out edges not found. */

/* Set up to remember how many edges we have and our start and stop. */
edgeCountNew = agNew->edgeCount;
vCountNew = agNew->vertexCount;
agNew->tStart = BIGNUM;
agNew->tEnd = 0;
for(i=0; i<agNew->vertexCount && i>= 0; i++)
    {
    struct evidence *ev = NULL;
    if(agNew->vPositions[i] == -1)
	{
	/* Adjust positions, overwriting one that isn't found. */
	vCountNew--;
	for(j=i; j<agNew->vertexCount-1; j++)
	    {
	    agNew->vPositions[j] = agNew->vPositions[j+1];
	    agNew->vTypes[j] = agNew->vTypes[j+1];
	    }
	/* Remove edges associated with this vertex. */
	for(j=0; j<agNew->edgeCount && j>=0; j++)
	    {
	    if(agNew->edgeStarts[j] == i || agNew->edgeEnds[j] == i)
		{
		edgeCountNew--;
		/* Remove evidence. */
		ev = slElementFromIx(agNew->evidence, j);
		slRemoveEl(&agNew->evidence, ev);
		for(k=j; k<agNew->edgeCount -1; k++)
		    {
		    agNew->edgeStarts[k] = agNew->edgeStarts[k+1];
		    agNew->edgeEnds[k] = agNew->edgeEnds[k+1];
		    agNew->edgeTypes[k] = agNew->edgeTypes[k+1];
		    }
		j--;
		agNew->edgeCount--;
		}
	    }
	/* Subtract off one vertex from all the others. */
	for(j=0; j<agNew->edgeCount; j++)
	    {
	    if(agNew->edgeStarts[j] > i)
		agNew->edgeStarts[j]--; 
	    if(agNew->edgeEnds[j] > i)
		agNew->edgeEnds[j]--; 
	    }
	i--;
	agNew->vertexCount--;
	}
    /* Else if vertex found set agNew start and ends. */
    else
	{
	agNew->tStart = min(agNew->vPositions[i], agNew->tStart);
	agNew->tEnd = max(agNew->vPositions[i], agNew->tEnd);
	}
    }
/* Not going to worry about mRNAs that aren't used anymore. Leave them in
   for now. */
agNew->vertexCount = vCountNew;
agNew->edgeCount = edgeCountNew;
if(agNew->vertexCount == 0 || agNew->edgeCount == 0)
    {
    altGraphXFree(&agNew);
    return NULL;
    }
for(i=0; i<agNew->edgeCount; i++)
    {
    if(agNew->edgeStarts[i] >= agNew->vertexCount ||
       agNew->edgeEnds[i] >= agNew->vertexCount)
	{
	warn("For %s vertexes occur at %d when in reality there are only %d vertices.",
	     agNew->name, max(agNew->edgeStarts[i], agNew->edgeEnds[i]), agNew->vertexCount);
	}
    }
/* If it is on the other strand reverse it. */
if(reverse)
    {
    altGraphXReverse(agNew);
    }
chainFree(&workingChainFree);
return agNew;
}
Example #4
0
void reportCassette(struct altGraphX *ag, bool **em, int vs, int ve1, int ve2, 
		    int altBpStart, int altBpEnd, int startV, int endV, FILE *out)
/* Write out both an altGraphX and two bed files. For a cassette exon the
 edges are - 
 Name       Vertexes         Class
 ------     ----------       -----
 exon1:     startV->vs       constitutive (cons 0)
 junction1: vs->ve1          alternative1 (alt1 1)
 exon2:     ve1->altBpEnd    alternative1 (alt1 1)
 junction2: altBpEnd->ve2    alternative1 (alt1 1)
 exon3:     ve2->endV        constitutive (cons 0)
 junction3: vs->ve2          alternative2 (alt2 2)
*/
{
struct altGraphX *agLoc = NULL;  /* Local altGraphX. */
struct evidence *ev = NULL, *evLoc = NULL;
int *vPos = ag->vPositions;
unsigned char *vT = ag->vTypes;
int *vPosLoc = NULL;    /* Vertex Positions. */
int *eStartsLoc = NULL; /* Edge Starts. */
int *eEndsLoc = NULL;   /* Edge ends. */
unsigned char *vTLoc = NULL;      /* Vertex Types. */
int *eTLoc = NULL;      /* Edge Types. */
int vCLoc = 0;
int eCLoc = 0;
int i =0;
struct dyString *dy = NULL;
if(out == NULL)
    return;
AllocVar(agLoc);
agLoc->tName = cloneString(ag->tName);
agLoc->name = cloneString(ag->name);
agLoc->tStart = vPos[startV];
agLoc->tEnd = vPos[endV];
agLoc->strand[0] = ag->strand[0];
agLoc->vertexCount = vCLoc = 6;
agLoc->edgeCount = eCLoc = 6;
agLoc->id = altCassette;
/* Allocate some arrays. */
AllocArray(vPosLoc, vCLoc);
AllocArray(eStartsLoc, vCLoc);
AllocArray(eEndsLoc, vCLoc);
AllocArray(vTLoc, vCLoc);
AllocArray(eTLoc, vCLoc);

/* Fill in the vertex positions. */
vPosLoc[0] = vPos[startV];
vPosLoc[1] = vPos[vs];
vPosLoc[2] = vPos[ve1];
vPosLoc[3] = vPos[altBpEnd];
vPosLoc[4] = vPos[ve2];
vPosLoc[5] = vPos[endV];

/* Fill in the vertex types. */
vTLoc[0] = vT[startV];
vTLoc[1] = vT[vs];
vTLoc[2] = vT[ve1];
vTLoc[3] = vT[altBpEnd];
vTLoc[4] = vT[ve2];
vTLoc[5] = vT[endV];

/* Fill in the edges. */
/* Constitutive first exon. */
eStartsLoc[0] = 0;
eEndsLoc[0] = 1;
eTLoc[0] = 0;
ev = evidenceForEdge(ag, startV, vs);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);
/* Exon inclusion junction. */
eStartsLoc[1] = 1;
eEndsLoc[1] = 2;
eTLoc[1] = 1;
ev = evidenceForEdge(ag, vs, ve1);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);

/* Exon exclusion junction. */
eStartsLoc[2] = 1;
eEndsLoc[2] = 4;
eTLoc[2] = 2;
ev = evidenceForEdge(ag, vs, ve2);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);

/* Cassette exon. */
eStartsLoc[3] = 2;
eEndsLoc[3] = 3;
eTLoc[3] = 1;
ev = evidenceForEdge(ag, ve1, altBpEnd);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);

/* Exon inclusion junction. */
eStartsLoc[4] = 3;
eEndsLoc[4] = 4;
eTLoc[4] = 1;
ev = evidenceForEdge(ag, altBpEnd, ve2);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);

/* Constitutive second exon. */
eStartsLoc[5] = 4;
eEndsLoc[5] = 5;
eTLoc[5] = 0;
ev = evidenceForEdge(ag, ve2, endV);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);

slReverse(&agLoc->evidence);

dy = newDyString(ag->mrnaRefCount*36);
agLoc->mrnaRefCount = ag->mrnaRefCount;
for(i=0; i<ag->mrnaRefCount; i++)
    dyStringPrintf(dy, "%s,", ag->mrnaRefs[i]);
sqlStringDynamicArray(dy->string, &agLoc->mrnaRefs, &i);
dyStringFree(&dy);
agLoc->mrnaTissues = CloneArray(ag->mrnaTissues, ag->mrnaRefCount);
agLoc->mrnaLibs = CloneArray(ag->mrnaLibs, ag->mrnaRefCount);
agLoc->vPositions = vPosLoc;
agLoc->edgeStarts = eStartsLoc;
agLoc->edgeEnds = eEndsLoc;
agLoc->vTypes = vTLoc;
agLoc->edgeTypes = eTLoc;
altGraphXTabOut(agLoc, out);
altGraphXFree(&agLoc);
}
Example #5
0
void reportAlt3Prime(struct altGraphX *ag, bool **em, int vs, int ve1, int ve2, 
		    int altBpStart, int altBpEnd, int startV, int endV, FILE *out)
/* Write out an altGraphX record for an alt3Prime splicing
event. Variable names are consistent with the rest of the program, but
can be misleading. Specifically vs = start of alt splicing, ve1 =
first end of alt splicing, etc. even though "vs" is really the end of
an exon. For an alt5Prime splice the edges are:

 Name       Vertexes         Class
 ------     ----------       -----
exon1:      startV->vs       constituative (0)
junction1:  vs->ve1          alternative (1)
junction2:  vs->ve2          alternative (2)
exon2:      ve1->e2        alternative (1)
exon3:      ve2->endV        constituative (0)
*/
{
struct altGraphX *agLoc = NULL;  /* Local altGraphX. */
struct evidence *ev = NULL, *evLoc = NULL;
int *vPos = ag->vPositions;
unsigned char *vT = ag->vTypes;
int *vPosLoc = NULL;    /* Vertex Positions. */
int *eStartsLoc = NULL; /* Edge Starts. */
int *eEndsLoc = NULL;   /* Edge ends. */
unsigned char *vTLoc = NULL;      /* Vertex Types. */
int *eTLoc = NULL;      /* Edge Types. */
int vCLoc = 0;
int eCLoc = 0;
int edgeIx = 0, vertexIx = 0;
int i =0;
struct dyString *dy = NULL;

if(out == NULL)
    return;
AllocVar(agLoc);
agLoc->tName = cloneString(ag->tName);
agLoc->name = cloneString(ag->name);
agLoc->tStart = vPos[startV];
agLoc->tEnd = vPos[endV];
agLoc->strand[0] = ag->strand[0];
agLoc->vertexCount = vCLoc = 6;
agLoc->edgeCount = eCLoc = 5;
agLoc->id = alt3Prime;
/* Allocate some arrays. */
AllocArray(vPosLoc, vCLoc);
AllocArray(eStartsLoc, eCLoc);
AllocArray(eEndsLoc, eCLoc);
AllocArray(vTLoc, vCLoc);
AllocArray(eTLoc, eCLoc);

/* Fill in the vertex positions. */
vertexIx = 0;
vPosLoc[vertexIx++] = vPos[startV]; /* 0 */
vPosLoc[vertexIx++] = vPos[vs];     /* 1 */
vPosLoc[vertexIx++] = vPos[ve1];    /* 2 */
vPosLoc[vertexIx++] = vPos[ve2];    /* 3 */
vPosLoc[vertexIx++] = vPos[ve2];    /* 4 */
vPosLoc[vertexIx++] = vPos[endV];   /* 5 */

/* Fill in the vertex types. */
vertexIx = 0;
vTLoc[vertexIx++] = vT[startV];
vTLoc[vertexIx++] = vT[vs];
vTLoc[vertexIx++] = vT[ve1];
vTLoc[vertexIx++] = vT[vs]; /* Faking a separate exon for the alt spliced portion. */
vTLoc[vertexIx++] = vT[ve2];
vTLoc[vertexIx++] = vT[endV];

edgeIx = 0;

/* Constitutive first exon. */
eStartsLoc[edgeIx] = 0;
eEndsLoc[edgeIx] = 1;
eTLoc[edgeIx] = 0;
ev = evidenceForEdge(ag, startV, vs);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);
edgeIx++;

/* Alternative1 junction (shorter). */
eStartsLoc[edgeIx] = 1;
eEndsLoc[edgeIx] = 2;
eTLoc[edgeIx] = 1;
ev = evidenceForEdge(ag, vs, ve1);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);
edgeIx++;

/* Alt2 junction (longer). */
eStartsLoc[edgeIx] = 1;
eEndsLoc[edgeIx] = 4;
eTLoc[edgeIx] = 2;
ev = evidenceForEdge(ag, vs, ve2);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);
edgeIx++;

/* Alt1 portion of second exon. */
eStartsLoc[edgeIx] = 2;
eEndsLoc[edgeIx] = 3;
eTLoc[edgeIx] = 1;
ev = evidenceForEdge(ag, ve1, endV);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);
edgeIx++;

/* Exon 2 constitutive (shorter exon) */
eStartsLoc[edgeIx] = 4;
eEndsLoc[edgeIx] = 5;
eTLoc[edgeIx] = 0;
ev = evidenceForEdge(ag, ve2, endV);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);
edgeIx++;

/* Package up the evidence, tissues, etc. */
slReverse(&agLoc->evidence);
dy = newDyString(ag->mrnaRefCount*36);
agLoc->mrnaRefCount = ag->mrnaRefCount;
for(i=0; i<ag->mrnaRefCount; i++)
    dyStringPrintf(dy, "%s,", ag->mrnaRefs[i]);
sqlStringDynamicArray(dy->string, &agLoc->mrnaRefs, &i);
dyStringFree(&dy);
agLoc->mrnaTissues = CloneArray(ag->mrnaTissues, ag->mrnaRefCount);
agLoc->mrnaLibs = CloneArray(ag->mrnaLibs, ag->mrnaRefCount);
agLoc->vPositions = vPosLoc;
agLoc->edgeStarts = eStartsLoc;
agLoc->edgeEnds = eEndsLoc;
agLoc->vTypes = vTLoc;
agLoc->edgeTypes = eTLoc;
altGraphXTabOut(agLoc, out);
altGraphXFree(&agLoc);
}