Example #1
0
void mapAltGraphXFile(struct sqlConnection *conn, char *db, char *orthoDb, char *chrom,
		      char *netTable, char *altGraphXFileName, char *altGraphXTableName,
		      FILE *agxOut, FILE *selectedOut, int *foundCount, int *notFoundCount)
/* Map over altGraphX Structures from one organism to
another. Basically create a mapping for the vertices and then reverse
them if on '-' strand.*/
{
int count =0;
struct bed *bed = NULL;
struct altGraphX *agList = NULL, *ag = NULL, *agNew = NULL;

if(altGraphXFileName != NULL)
    {
    warn("Loading altGraphX Records from file %s.", altGraphXFileName);
    agList = altGraphXLoadAll(altGraphXFileName);
    }
else if(altGraphXTableName != NULL)
    {
    char query[256];
    warn("Reading altGraphX Records from table %s.", altGraphXTableName);
    sqlSafef(query, sizeof(query), "select * from %s where tName like '%s'", altGraphXTableName, chrom);
    agList = altGraphXLoadByQuery(conn, query);
    }
else
    errAbort("orthoMap::mapAlGraphXFile() - Need a table name or file name to load altGraphX records");
warn("Mapping altGraphX records.");
for(ag = agList; ag != NULL; ag = ag->next)
    {
    if(differentString(ag->tName, chrom))
	continue;
    occassionalDot();
    agNew = mapAltGraphX(ag, conn, db, netTable);
    if(agNew == NULL)
	(*notFoundCount)++;
    else
	{
	(*foundCount)++;
	altGraphXTabOut(agNew, agxOut);
	altGraphXFree(&agNew);
        if (selectedOut != NULL)
            altGraphXTabOut(ag, selectedOut);
	}
    count++;
    }	
}
Example #2
0
struct altGraphX *agFromAlignments(char *db, struct ggMrnaAli *maList, struct dnaSeq *seq, struct sqlConnection *conn,
				   int chromStart, int chromEnd, FILE *out )
/** Custer overlaps from maList into altGraphX structure. */
{
struct altGraphX *ag = NULL, *agList = NULL;
struct ggMrnaCluster *mcList=NULL, *mc=NULL;
struct ggMrnaInput *ci = NULL;
struct geneGraph *gg = NULL;
static int count = 0;
ci = ggMrnaInputFromAlignments(maList, seq);
mcList = ggClusterMrna(ci);
if(mcList == NULL)
    {	
    freeGgMrnaInput(&ci);
    return NULL;
    }    

clusterCount++;
for(mc = mcList; mc != NULL; mc = mc->next)
    {
    if(optionExists("consensus"))
	{
	gg = ggGraphConsensusCluster(db, mc, ci, tissLibHash, !optionExists("skipTissues"));
	}
    else
	gg = ggGraphCluster(db, mc,ci);
    assert(checkEvidenceMatrix(gg));
    ag = ggToAltGraphX(gg);
    if(ag != NULL)
	{
	char name[256];
	freez(&ag->name);
	safef(name, sizeof(name), "%s.%d", ag->tName, count++);
	ag->name = cloneString(name);
	/* Convert back to genomic coordinates. */
	altGraphXoffset(ag, chromStart);
	/* Sort vertices so that they are chromosomal order */
 	altGraphXVertPosSort(ag);
	/* write to file */
	binKeeperAdd(agxSeenBin, ag->tStart, ag->tEnd, ag);
	slAddHead(&agList, ag);
	}
    }

/* Sometimes get nested, partial transcripts. Want to filter 
   those out. */
for(ag = agList; ag != NULL; ag = ag->next)
    {
    if(!agxIsRedundant(ag))
       altGraphXTabOut(ag, out); 
    }
/* genoSeq and maList are freed with ci and gg */
ggFreeMrnaClusterList(&mcList);
freeGgMrnaInput(&ci);
freeGeneGraph(&gg);
return agList;
}
void txgToAgx(char *inTxg, char *outAgx)
/* txgToAgx - Convert from txg (txGraph) format to agx (altGraphX). */
{
struct lineFile *lf = lineFileOpen(inTxg, TRUE);
char *row[TXGRAPH_NUM_COLS];
FILE *f = mustOpen(outAgx, "w");

while (lineFileRow(lf, row))
    {
    struct txGraph *txg = txGraphLoad(row);
    verbose(2, "loaded txGraph %s\n", txg->name);
    struct altGraphX *agx = txGraphToAltGraphX(txg);
    altGraphXTabOut(agx, f);
    altGraphXFree(&agx);
    txGraphFree(&txg);
    }

carefulClose(&f);
}
void writeCassetteExon(struct bed *bedList, struct altGraphX *ag, int eIx, boolean *outputted, 
		       FILE *bedOutFile, FILE *outfile, FILE *html, float conf )
/* Write out the information for a cassette exon. */
{
int i = eIx;
struct bed *bed=NULL;
if(bedOutFile != NULL)
    bedTabOutN(bedList,12, bedOutFile);
writeBrowserLink(html, ag, conf, i);
if(!outputted)
    {
    altGraphXTabOut(ag, stdout);
    *outputted = TRUE;
    }
if(outfile != NULL)
    {
    struct dnaSeq *seq = hChromSeq(ag->tName, ag->vPositions[ag->edgeStarts[i]], ag->vPositions[ag->edgeEnds[i]]);
    if(sameString(ag->strand , "+")) 
	reverseComplement(seq->dna, seq->size);
    if(seq->size < 200)
	faWriteNext(outfile, seq->name, seq->dna, seq->size);
    freeDnaSeq(&seq);
    }
}
Example #5
0
void reportCassette(struct altGraphX *ag, bool **em, int vs, int ve1, int ve2, 
		    int altBpStart, int altBpEnd, int startV, int endV, FILE *out)
/* Write out both an altGraphX and two bed files. For a cassette exon the
 edges are - 
 Name       Vertexes         Class
 ------     ----------       -----
 exon1:     startV->vs       constitutive (cons 0)
 junction1: vs->ve1          alternative1 (alt1 1)
 exon2:     ve1->altBpEnd    alternative1 (alt1 1)
 junction2: altBpEnd->ve2    alternative1 (alt1 1)
 exon3:     ve2->endV        constitutive (cons 0)
 junction3: vs->ve2          alternative2 (alt2 2)
*/
{
struct altGraphX *agLoc = NULL;  /* Local altGraphX. */
struct evidence *ev = NULL, *evLoc = NULL;
int *vPos = ag->vPositions;
unsigned char *vT = ag->vTypes;
int *vPosLoc = NULL;    /* Vertex Positions. */
int *eStartsLoc = NULL; /* Edge Starts. */
int *eEndsLoc = NULL;   /* Edge ends. */
unsigned char *vTLoc = NULL;      /* Vertex Types. */
int *eTLoc = NULL;      /* Edge Types. */
int vCLoc = 0;
int eCLoc = 0;
int i =0;
struct dyString *dy = NULL;
if(out == NULL)
    return;
AllocVar(agLoc);
agLoc->tName = cloneString(ag->tName);
agLoc->name = cloneString(ag->name);
agLoc->tStart = vPos[startV];
agLoc->tEnd = vPos[endV];
agLoc->strand[0] = ag->strand[0];
agLoc->vertexCount = vCLoc = 6;
agLoc->edgeCount = eCLoc = 6;
agLoc->id = altCassette;
/* Allocate some arrays. */
AllocArray(vPosLoc, vCLoc);
AllocArray(eStartsLoc, vCLoc);
AllocArray(eEndsLoc, vCLoc);
AllocArray(vTLoc, vCLoc);
AllocArray(eTLoc, vCLoc);

/* Fill in the vertex positions. */
vPosLoc[0] = vPos[startV];
vPosLoc[1] = vPos[vs];
vPosLoc[2] = vPos[ve1];
vPosLoc[3] = vPos[altBpEnd];
vPosLoc[4] = vPos[ve2];
vPosLoc[5] = vPos[endV];

/* Fill in the vertex types. */
vTLoc[0] = vT[startV];
vTLoc[1] = vT[vs];
vTLoc[2] = vT[ve1];
vTLoc[3] = vT[altBpEnd];
vTLoc[4] = vT[ve2];
vTLoc[5] = vT[endV];

/* Fill in the edges. */
/* Constitutive first exon. */
eStartsLoc[0] = 0;
eEndsLoc[0] = 1;
eTLoc[0] = 0;
ev = evidenceForEdge(ag, startV, vs);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);
/* Exon inclusion junction. */
eStartsLoc[1] = 1;
eEndsLoc[1] = 2;
eTLoc[1] = 1;
ev = evidenceForEdge(ag, vs, ve1);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);

/* Exon exclusion junction. */
eStartsLoc[2] = 1;
eEndsLoc[2] = 4;
eTLoc[2] = 2;
ev = evidenceForEdge(ag, vs, ve2);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);

/* Cassette exon. */
eStartsLoc[3] = 2;
eEndsLoc[3] = 3;
eTLoc[3] = 1;
ev = evidenceForEdge(ag, ve1, altBpEnd);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);

/* Exon inclusion junction. */
eStartsLoc[4] = 3;
eEndsLoc[4] = 4;
eTLoc[4] = 1;
ev = evidenceForEdge(ag, altBpEnd, ve2);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);

/* Constitutive second exon. */
eStartsLoc[5] = 4;
eEndsLoc[5] = 5;
eTLoc[5] = 0;
ev = evidenceForEdge(ag, ve2, endV);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);

slReverse(&agLoc->evidence);

dy = newDyString(ag->mrnaRefCount*36);
agLoc->mrnaRefCount = ag->mrnaRefCount;
for(i=0; i<ag->mrnaRefCount; i++)
    dyStringPrintf(dy, "%s,", ag->mrnaRefs[i]);
sqlStringDynamicArray(dy->string, &agLoc->mrnaRefs, &i);
dyStringFree(&dy);
agLoc->mrnaTissues = CloneArray(ag->mrnaTissues, ag->mrnaRefCount);
agLoc->mrnaLibs = CloneArray(ag->mrnaLibs, ag->mrnaRefCount);
agLoc->vPositions = vPosLoc;
agLoc->edgeStarts = eStartsLoc;
agLoc->edgeEnds = eEndsLoc;
agLoc->vTypes = vTLoc;
agLoc->edgeTypes = eTLoc;
altGraphXTabOut(agLoc, out);
altGraphXFree(&agLoc);
}
Example #6
0
void reportAlt3Prime(struct altGraphX *ag, bool **em, int vs, int ve1, int ve2, 
		    int altBpStart, int altBpEnd, int startV, int endV, FILE *out)
/* Write out an altGraphX record for an alt3Prime splicing
event. Variable names are consistent with the rest of the program, but
can be misleading. Specifically vs = start of alt splicing, ve1 =
first end of alt splicing, etc. even though "vs" is really the end of
an exon. For an alt5Prime splice the edges are:

 Name       Vertexes         Class
 ------     ----------       -----
exon1:      startV->vs       constituative (0)
junction1:  vs->ve1          alternative (1)
junction2:  vs->ve2          alternative (2)
exon2:      ve1->e2        alternative (1)
exon3:      ve2->endV        constituative (0)
*/
{
struct altGraphX *agLoc = NULL;  /* Local altGraphX. */
struct evidence *ev = NULL, *evLoc = NULL;
int *vPos = ag->vPositions;
unsigned char *vT = ag->vTypes;
int *vPosLoc = NULL;    /* Vertex Positions. */
int *eStartsLoc = NULL; /* Edge Starts. */
int *eEndsLoc = NULL;   /* Edge ends. */
unsigned char *vTLoc = NULL;      /* Vertex Types. */
int *eTLoc = NULL;      /* Edge Types. */
int vCLoc = 0;
int eCLoc = 0;
int edgeIx = 0, vertexIx = 0;
int i =0;
struct dyString *dy = NULL;

if(out == NULL)
    return;
AllocVar(agLoc);
agLoc->tName = cloneString(ag->tName);
agLoc->name = cloneString(ag->name);
agLoc->tStart = vPos[startV];
agLoc->tEnd = vPos[endV];
agLoc->strand[0] = ag->strand[0];
agLoc->vertexCount = vCLoc = 6;
agLoc->edgeCount = eCLoc = 5;
agLoc->id = alt3Prime;
/* Allocate some arrays. */
AllocArray(vPosLoc, vCLoc);
AllocArray(eStartsLoc, eCLoc);
AllocArray(eEndsLoc, eCLoc);
AllocArray(vTLoc, vCLoc);
AllocArray(eTLoc, eCLoc);

/* Fill in the vertex positions. */
vertexIx = 0;
vPosLoc[vertexIx++] = vPos[startV]; /* 0 */
vPosLoc[vertexIx++] = vPos[vs];     /* 1 */
vPosLoc[vertexIx++] = vPos[ve1];    /* 2 */
vPosLoc[vertexIx++] = vPos[ve2];    /* 3 */
vPosLoc[vertexIx++] = vPos[ve2];    /* 4 */
vPosLoc[vertexIx++] = vPos[endV];   /* 5 */

/* Fill in the vertex types. */
vertexIx = 0;
vTLoc[vertexIx++] = vT[startV];
vTLoc[vertexIx++] = vT[vs];
vTLoc[vertexIx++] = vT[ve1];
vTLoc[vertexIx++] = vT[vs]; /* Faking a separate exon for the alt spliced portion. */
vTLoc[vertexIx++] = vT[ve2];
vTLoc[vertexIx++] = vT[endV];

edgeIx = 0;

/* Constitutive first exon. */
eStartsLoc[edgeIx] = 0;
eEndsLoc[edgeIx] = 1;
eTLoc[edgeIx] = 0;
ev = evidenceForEdge(ag, startV, vs);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);
edgeIx++;

/* Alternative1 junction (shorter). */
eStartsLoc[edgeIx] = 1;
eEndsLoc[edgeIx] = 2;
eTLoc[edgeIx] = 1;
ev = evidenceForEdge(ag, vs, ve1);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);
edgeIx++;

/* Alt2 junction (longer). */
eStartsLoc[edgeIx] = 1;
eEndsLoc[edgeIx] = 4;
eTLoc[edgeIx] = 2;
ev = evidenceForEdge(ag, vs, ve2);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);
edgeIx++;

/* Alt1 portion of second exon. */
eStartsLoc[edgeIx] = 2;
eEndsLoc[edgeIx] = 3;
eTLoc[edgeIx] = 1;
ev = evidenceForEdge(ag, ve1, endV);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);
edgeIx++;

/* Exon 2 constitutive (shorter exon) */
eStartsLoc[edgeIx] = 4;
eEndsLoc[edgeIx] = 5;
eTLoc[edgeIx] = 0;
ev = evidenceForEdge(ag, ve2, endV);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);
edgeIx++;

/* Package up the evidence, tissues, etc. */
slReverse(&agLoc->evidence);
dy = newDyString(ag->mrnaRefCount*36);
agLoc->mrnaRefCount = ag->mrnaRefCount;
for(i=0; i<ag->mrnaRefCount; i++)
    dyStringPrintf(dy, "%s,", ag->mrnaRefs[i]);
sqlStringDynamicArray(dy->string, &agLoc->mrnaRefs, &i);
dyStringFree(&dy);
agLoc->mrnaTissues = CloneArray(ag->mrnaTissues, ag->mrnaRefCount);
agLoc->mrnaLibs = CloneArray(ag->mrnaLibs, ag->mrnaRefCount);
agLoc->vPositions = vPosLoc;
agLoc->edgeStarts = eStartsLoc;
agLoc->edgeEnds = eEndsLoc;
agLoc->vTypes = vTLoc;
agLoc->edgeTypes = eTLoc;
altGraphXTabOut(agLoc, out);
altGraphXFree(&agLoc);
}