void mapAltGraphXFile(struct sqlConnection *conn, char *db, char *orthoDb, char *chrom, char *netTable, char *altGraphXFileName, char *altGraphXTableName, FILE *agxOut, FILE *selectedOut, int *foundCount, int *notFoundCount) /* Map over altGraphX Structures from one organism to another. Basically create a mapping for the vertices and then reverse them if on '-' strand.*/ { int count =0; struct bed *bed = NULL; struct altGraphX *agList = NULL, *ag = NULL, *agNew = NULL; if(altGraphXFileName != NULL) { warn("Loading altGraphX Records from file %s.", altGraphXFileName); agList = altGraphXLoadAll(altGraphXFileName); } else if(altGraphXTableName != NULL) { char query[256]; warn("Reading altGraphX Records from table %s.", altGraphXTableName); sqlSafef(query, sizeof(query), "select * from %s where tName like '%s'", altGraphXTableName, chrom); agList = altGraphXLoadByQuery(conn, query); } else errAbort("orthoMap::mapAlGraphXFile() - Need a table name or file name to load altGraphX records"); warn("Mapping altGraphX records."); for(ag = agList; ag != NULL; ag = ag->next) { if(differentString(ag->tName, chrom)) continue; occassionalDot(); agNew = mapAltGraphX(ag, conn, db, netTable); if(agNew == NULL) (*notFoundCount)++; else { (*foundCount)++; altGraphXTabOut(agNew, agxOut); altGraphXFree(&agNew); if (selectedOut != NULL) altGraphXTabOut(ag, selectedOut); } count++; } }
struct altGraphX *agFromAlignments(char *db, struct ggMrnaAli *maList, struct dnaSeq *seq, struct sqlConnection *conn, int chromStart, int chromEnd, FILE *out ) /** Custer overlaps from maList into altGraphX structure. */ { struct altGraphX *ag = NULL, *agList = NULL; struct ggMrnaCluster *mcList=NULL, *mc=NULL; struct ggMrnaInput *ci = NULL; struct geneGraph *gg = NULL; static int count = 0; ci = ggMrnaInputFromAlignments(maList, seq); mcList = ggClusterMrna(ci); if(mcList == NULL) { freeGgMrnaInput(&ci); return NULL; } clusterCount++; for(mc = mcList; mc != NULL; mc = mc->next) { if(optionExists("consensus")) { gg = ggGraphConsensusCluster(db, mc, ci, tissLibHash, !optionExists("skipTissues")); } else gg = ggGraphCluster(db, mc,ci); assert(checkEvidenceMatrix(gg)); ag = ggToAltGraphX(gg); if(ag != NULL) { char name[256]; freez(&ag->name); safef(name, sizeof(name), "%s.%d", ag->tName, count++); ag->name = cloneString(name); /* Convert back to genomic coordinates. */ altGraphXoffset(ag, chromStart); /* Sort vertices so that they are chromosomal order */ altGraphXVertPosSort(ag); /* write to file */ binKeeperAdd(agxSeenBin, ag->tStart, ag->tEnd, ag); slAddHead(&agList, ag); } } /* Sometimes get nested, partial transcripts. Want to filter those out. */ for(ag = agList; ag != NULL; ag = ag->next) { if(!agxIsRedundant(ag)) altGraphXTabOut(ag, out); } /* genoSeq and maList are freed with ci and gg */ ggFreeMrnaClusterList(&mcList); freeGgMrnaInput(&ci); freeGeneGraph(&gg); return agList; }
void txgToAgx(char *inTxg, char *outAgx) /* txgToAgx - Convert from txg (txGraph) format to agx (altGraphX). */ { struct lineFile *lf = lineFileOpen(inTxg, TRUE); char *row[TXGRAPH_NUM_COLS]; FILE *f = mustOpen(outAgx, "w"); while (lineFileRow(lf, row)) { struct txGraph *txg = txGraphLoad(row); verbose(2, "loaded txGraph %s\n", txg->name); struct altGraphX *agx = txGraphToAltGraphX(txg); altGraphXTabOut(agx, f); altGraphXFree(&agx); txGraphFree(&txg); } carefulClose(&f); }
void writeCassetteExon(struct bed *bedList, struct altGraphX *ag, int eIx, boolean *outputted, FILE *bedOutFile, FILE *outfile, FILE *html, float conf ) /* Write out the information for a cassette exon. */ { int i = eIx; struct bed *bed=NULL; if(bedOutFile != NULL) bedTabOutN(bedList,12, bedOutFile); writeBrowserLink(html, ag, conf, i); if(!outputted) { altGraphXTabOut(ag, stdout); *outputted = TRUE; } if(outfile != NULL) { struct dnaSeq *seq = hChromSeq(ag->tName, ag->vPositions[ag->edgeStarts[i]], ag->vPositions[ag->edgeEnds[i]]); if(sameString(ag->strand , "+")) reverseComplement(seq->dna, seq->size); if(seq->size < 200) faWriteNext(outfile, seq->name, seq->dna, seq->size); freeDnaSeq(&seq); } }
void reportCassette(struct altGraphX *ag, bool **em, int vs, int ve1, int ve2, int altBpStart, int altBpEnd, int startV, int endV, FILE *out) /* Write out both an altGraphX and two bed files. For a cassette exon the edges are - Name Vertexes Class ------ ---------- ----- exon1: startV->vs constitutive (cons 0) junction1: vs->ve1 alternative1 (alt1 1) exon2: ve1->altBpEnd alternative1 (alt1 1) junction2: altBpEnd->ve2 alternative1 (alt1 1) exon3: ve2->endV constitutive (cons 0) junction3: vs->ve2 alternative2 (alt2 2) */ { struct altGraphX *agLoc = NULL; /* Local altGraphX. */ struct evidence *ev = NULL, *evLoc = NULL; int *vPos = ag->vPositions; unsigned char *vT = ag->vTypes; int *vPosLoc = NULL; /* Vertex Positions. */ int *eStartsLoc = NULL; /* Edge Starts. */ int *eEndsLoc = NULL; /* Edge ends. */ unsigned char *vTLoc = NULL; /* Vertex Types. */ int *eTLoc = NULL; /* Edge Types. */ int vCLoc = 0; int eCLoc = 0; int i =0; struct dyString *dy = NULL; if(out == NULL) return; AllocVar(agLoc); agLoc->tName = cloneString(ag->tName); agLoc->name = cloneString(ag->name); agLoc->tStart = vPos[startV]; agLoc->tEnd = vPos[endV]; agLoc->strand[0] = ag->strand[0]; agLoc->vertexCount = vCLoc = 6; agLoc->edgeCount = eCLoc = 6; agLoc->id = altCassette; /* Allocate some arrays. */ AllocArray(vPosLoc, vCLoc); AllocArray(eStartsLoc, vCLoc); AllocArray(eEndsLoc, vCLoc); AllocArray(vTLoc, vCLoc); AllocArray(eTLoc, vCLoc); /* Fill in the vertex positions. */ vPosLoc[0] = vPos[startV]; vPosLoc[1] = vPos[vs]; vPosLoc[2] = vPos[ve1]; vPosLoc[3] = vPos[altBpEnd]; vPosLoc[4] = vPos[ve2]; vPosLoc[5] = vPos[endV]; /* Fill in the vertex types. */ vTLoc[0] = vT[startV]; vTLoc[1] = vT[vs]; vTLoc[2] = vT[ve1]; vTLoc[3] = vT[altBpEnd]; vTLoc[4] = vT[ve2]; vTLoc[5] = vT[endV]; /* Fill in the edges. */ /* Constitutive first exon. */ eStartsLoc[0] = 0; eEndsLoc[0] = 1; eTLoc[0] = 0; ev = evidenceForEdge(ag, startV, vs); evLoc = CloneVar(ev); evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount); slAddHead(&agLoc->evidence, evLoc); /* Exon inclusion junction. */ eStartsLoc[1] = 1; eEndsLoc[1] = 2; eTLoc[1] = 1; ev = evidenceForEdge(ag, vs, ve1); evLoc = CloneVar(ev); evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount); slAddHead(&agLoc->evidence, evLoc); /* Exon exclusion junction. */ eStartsLoc[2] = 1; eEndsLoc[2] = 4; eTLoc[2] = 2; ev = evidenceForEdge(ag, vs, ve2); evLoc = CloneVar(ev); evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount); slAddHead(&agLoc->evidence, evLoc); /* Cassette exon. */ eStartsLoc[3] = 2; eEndsLoc[3] = 3; eTLoc[3] = 1; ev = evidenceForEdge(ag, ve1, altBpEnd); evLoc = CloneVar(ev); evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount); slAddHead(&agLoc->evidence, evLoc); /* Exon inclusion junction. */ eStartsLoc[4] = 3; eEndsLoc[4] = 4; eTLoc[4] = 1; ev = evidenceForEdge(ag, altBpEnd, ve2); evLoc = CloneVar(ev); evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount); slAddHead(&agLoc->evidence, evLoc); /* Constitutive second exon. */ eStartsLoc[5] = 4; eEndsLoc[5] = 5; eTLoc[5] = 0; ev = evidenceForEdge(ag, ve2, endV); evLoc = CloneVar(ev); evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount); slAddHead(&agLoc->evidence, evLoc); slReverse(&agLoc->evidence); dy = newDyString(ag->mrnaRefCount*36); agLoc->mrnaRefCount = ag->mrnaRefCount; for(i=0; i<ag->mrnaRefCount; i++) dyStringPrintf(dy, "%s,", ag->mrnaRefs[i]); sqlStringDynamicArray(dy->string, &agLoc->mrnaRefs, &i); dyStringFree(&dy); agLoc->mrnaTissues = CloneArray(ag->mrnaTissues, ag->mrnaRefCount); agLoc->mrnaLibs = CloneArray(ag->mrnaLibs, ag->mrnaRefCount); agLoc->vPositions = vPosLoc; agLoc->edgeStarts = eStartsLoc; agLoc->edgeEnds = eEndsLoc; agLoc->vTypes = vTLoc; agLoc->edgeTypes = eTLoc; altGraphXTabOut(agLoc, out); altGraphXFree(&agLoc); }
void reportAlt3Prime(struct altGraphX *ag, bool **em, int vs, int ve1, int ve2, int altBpStart, int altBpEnd, int startV, int endV, FILE *out) /* Write out an altGraphX record for an alt3Prime splicing event. Variable names are consistent with the rest of the program, but can be misleading. Specifically vs = start of alt splicing, ve1 = first end of alt splicing, etc. even though "vs" is really the end of an exon. For an alt5Prime splice the edges are: Name Vertexes Class ------ ---------- ----- exon1: startV->vs constituative (0) junction1: vs->ve1 alternative (1) junction2: vs->ve2 alternative (2) exon2: ve1->e2 alternative (1) exon3: ve2->endV constituative (0) */ { struct altGraphX *agLoc = NULL; /* Local altGraphX. */ struct evidence *ev = NULL, *evLoc = NULL; int *vPos = ag->vPositions; unsigned char *vT = ag->vTypes; int *vPosLoc = NULL; /* Vertex Positions. */ int *eStartsLoc = NULL; /* Edge Starts. */ int *eEndsLoc = NULL; /* Edge ends. */ unsigned char *vTLoc = NULL; /* Vertex Types. */ int *eTLoc = NULL; /* Edge Types. */ int vCLoc = 0; int eCLoc = 0; int edgeIx = 0, vertexIx = 0; int i =0; struct dyString *dy = NULL; if(out == NULL) return; AllocVar(agLoc); agLoc->tName = cloneString(ag->tName); agLoc->name = cloneString(ag->name); agLoc->tStart = vPos[startV]; agLoc->tEnd = vPos[endV]; agLoc->strand[0] = ag->strand[0]; agLoc->vertexCount = vCLoc = 6; agLoc->edgeCount = eCLoc = 5; agLoc->id = alt3Prime; /* Allocate some arrays. */ AllocArray(vPosLoc, vCLoc); AllocArray(eStartsLoc, eCLoc); AllocArray(eEndsLoc, eCLoc); AllocArray(vTLoc, vCLoc); AllocArray(eTLoc, eCLoc); /* Fill in the vertex positions. */ vertexIx = 0; vPosLoc[vertexIx++] = vPos[startV]; /* 0 */ vPosLoc[vertexIx++] = vPos[vs]; /* 1 */ vPosLoc[vertexIx++] = vPos[ve1]; /* 2 */ vPosLoc[vertexIx++] = vPos[ve2]; /* 3 */ vPosLoc[vertexIx++] = vPos[ve2]; /* 4 */ vPosLoc[vertexIx++] = vPos[endV]; /* 5 */ /* Fill in the vertex types. */ vertexIx = 0; vTLoc[vertexIx++] = vT[startV]; vTLoc[vertexIx++] = vT[vs]; vTLoc[vertexIx++] = vT[ve1]; vTLoc[vertexIx++] = vT[vs]; /* Faking a separate exon for the alt spliced portion. */ vTLoc[vertexIx++] = vT[ve2]; vTLoc[vertexIx++] = vT[endV]; edgeIx = 0; /* Constitutive first exon. */ eStartsLoc[edgeIx] = 0; eEndsLoc[edgeIx] = 1; eTLoc[edgeIx] = 0; ev = evidenceForEdge(ag, startV, vs); evLoc = CloneVar(ev); evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount); slAddHead(&agLoc->evidence, evLoc); edgeIx++; /* Alternative1 junction (shorter). */ eStartsLoc[edgeIx] = 1; eEndsLoc[edgeIx] = 2; eTLoc[edgeIx] = 1; ev = evidenceForEdge(ag, vs, ve1); evLoc = CloneVar(ev); evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount); slAddHead(&agLoc->evidence, evLoc); edgeIx++; /* Alt2 junction (longer). */ eStartsLoc[edgeIx] = 1; eEndsLoc[edgeIx] = 4; eTLoc[edgeIx] = 2; ev = evidenceForEdge(ag, vs, ve2); evLoc = CloneVar(ev); evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount); slAddHead(&agLoc->evidence, evLoc); edgeIx++; /* Alt1 portion of second exon. */ eStartsLoc[edgeIx] = 2; eEndsLoc[edgeIx] = 3; eTLoc[edgeIx] = 1; ev = evidenceForEdge(ag, ve1, endV); evLoc = CloneVar(ev); evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount); slAddHead(&agLoc->evidence, evLoc); edgeIx++; /* Exon 2 constitutive (shorter exon) */ eStartsLoc[edgeIx] = 4; eEndsLoc[edgeIx] = 5; eTLoc[edgeIx] = 0; ev = evidenceForEdge(ag, ve2, endV); evLoc = CloneVar(ev); evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount); slAddHead(&agLoc->evidence, evLoc); edgeIx++; /* Package up the evidence, tissues, etc. */ slReverse(&agLoc->evidence); dy = newDyString(ag->mrnaRefCount*36); agLoc->mrnaRefCount = ag->mrnaRefCount; for(i=0; i<ag->mrnaRefCount; i++) dyStringPrintf(dy, "%s,", ag->mrnaRefs[i]); sqlStringDynamicArray(dy->string, &agLoc->mrnaRefs, &i); dyStringFree(&dy); agLoc->mrnaTissues = CloneArray(ag->mrnaTissues, ag->mrnaRefCount); agLoc->mrnaLibs = CloneArray(ag->mrnaLibs, ag->mrnaRefCount); agLoc->vPositions = vPosLoc; agLoc->edgeStarts = eStartsLoc; agLoc->edgeEnds = eEndsLoc; agLoc->vTypes = vTLoc; agLoc->edgeTypes = eTLoc; altGraphXTabOut(agLoc, out); altGraphXFree(&agLoc); }