void mapAltGraphXFile(struct sqlConnection *conn, char *db, char *orthoDb, char *chrom, char *netTable, char *altGraphXFileName, char *altGraphXTableName, FILE *agxOut, FILE *selectedOut, int *foundCount, int *notFoundCount) /* Map over altGraphX Structures from one organism to another. Basically create a mapping for the vertices and then reverse them if on '-' strand.*/ { int count =0; struct bed *bed = NULL; struct altGraphX *agList = NULL, *ag = NULL, *agNew = NULL; if(altGraphXFileName != NULL) { warn("Loading altGraphX Records from file %s.", altGraphXFileName); agList = altGraphXLoadAll(altGraphXFileName); } else if(altGraphXTableName != NULL) { char query[256]; warn("Reading altGraphX Records from table %s.", altGraphXTableName); sqlSafef(query, sizeof(query), "select * from %s where tName like '%s'", altGraphXTableName, chrom); agList = altGraphXLoadByQuery(conn, query); } else errAbort("orthoMap::mapAlGraphXFile() - Need a table name or file name to load altGraphX records"); warn("Mapping altGraphX records."); for(ag = agList; ag != NULL; ag = ag->next) { if(differentString(ag->tName, chrom)) continue; occassionalDot(); agNew = mapAltGraphX(ag, conn, db, netTable); if(agNew == NULL) (*notFoundCount)++; else { (*foundCount)++; altGraphXTabOut(agNew, agxOut); altGraphXFree(&agNew); if (selectedOut != NULL) altGraphXTabOut(ag, selectedOut); } count++; } }
void txgToAgx(char *inTxg, char *outAgx) /* txgToAgx - Convert from txg (txGraph) format to agx (altGraphX). */ { struct lineFile *lf = lineFileOpen(inTxg, TRUE); char *row[TXGRAPH_NUM_COLS]; FILE *f = mustOpen(outAgx, "w"); while (lineFileRow(lf, row)) { struct txGraph *txg = txGraphLoad(row); verbose(2, "loaded txGraph %s\n", txg->name); struct altGraphX *agx = txGraphToAltGraphX(txg); altGraphXTabOut(agx, f); altGraphXFree(&agx); txGraphFree(&txg); } carefulClose(&f); }
struct altGraphX *mapAltGraphX(struct altGraphX *ag, struct sqlConnection *conn, char *db, char *netTable ) /* Map one altGraphX record. Return NULL if can't find. This function is getting a bit long but it isn't easy to do...*/ { struct altGraphX *agNew = NULL; struct chain *chain = NULL; struct chain *workingChain = NULL, *workingChainFree = NULL; struct chain *subChain = NULL, *toFree = NULL; int i,j,k; int edgeCountNew =0; int vCountNew=0; bool reverse = FALSE; int *starts = NULL, *sizes = NULL; int blockCount =0; /* Find the best chain (one that overlaps the most exons. */ AllocArray(starts, ag->edgeCount); AllocArray(sizes, ag->edgeCount); for(i=0; i<ag->edgeCount; i++) { if(getSpliceEdgeType(ag, i) == ggExon) { starts[blockCount] = ag->vPositions[ag->edgeStarts[i]]; sizes[blockCount] = ag->vPositions[ag->edgeEnds[i]] - ag->vPositions[ag->edgeStarts[i]]; blockCount++; } } chain = chainForBlocks(conn, db, netTable, ag->tName, ag->tStart, ag->tEnd, starts, sizes, blockCount); freez(&starts); freez(&sizes); if(chain == NULL) return NULL; /* Make a smaller chain to work on... */ chainSubSetForRegion(chain, ag->tStart-1, ag->tEnd+1, &workingChain, &workingChainFree); if(workingChain == NULL) return NULL; if (chain->qStrand == '-') reverse = TRUE; agNew = altGraphXClone(ag); freez(&agNew->tName); agNew->tName = cloneString(chain->qName); /* Map vertex positions using chain. */ for(i = 0; i < agNew->vertexCount; i++) { struct cBlock *bi = NULL; int targetPos = agNew->vPositions[i]; struct chain *subChain=NULL, *toFree=NULL; agNew->vPositions[i] = -1; chainSubSetForRegion(workingChain, targetPos , targetPos, &subChain, &toFree); if(subChain != NULL) { int qs, qe; qChainRangePlusStrand(subChain, &qs, &qe); agNew->vPositions[i] = qs; } chainFree(&toFree); } /* Prune out edges not found. */ /* Set up to remember how many edges we have and our start and stop. */ edgeCountNew = agNew->edgeCount; vCountNew = agNew->vertexCount; agNew->tStart = BIGNUM; agNew->tEnd = 0; for(i=0; i<agNew->vertexCount && i>= 0; i++) { struct evidence *ev = NULL; if(agNew->vPositions[i] == -1) { /* Adjust positions, overwriting one that isn't found. */ vCountNew--; for(j=i; j<agNew->vertexCount-1; j++) { agNew->vPositions[j] = agNew->vPositions[j+1]; agNew->vTypes[j] = agNew->vTypes[j+1]; } /* Remove edges associated with this vertex. */ for(j=0; j<agNew->edgeCount && j>=0; j++) { if(agNew->edgeStarts[j] == i || agNew->edgeEnds[j] == i) { edgeCountNew--; /* Remove evidence. */ ev = slElementFromIx(agNew->evidence, j); slRemoveEl(&agNew->evidence, ev); for(k=j; k<agNew->edgeCount -1; k++) { agNew->edgeStarts[k] = agNew->edgeStarts[k+1]; agNew->edgeEnds[k] = agNew->edgeEnds[k+1]; agNew->edgeTypes[k] = agNew->edgeTypes[k+1]; } j--; agNew->edgeCount--; } } /* Subtract off one vertex from all the others. */ for(j=0; j<agNew->edgeCount; j++) { if(agNew->edgeStarts[j] > i) agNew->edgeStarts[j]--; if(agNew->edgeEnds[j] > i) agNew->edgeEnds[j]--; } i--; agNew->vertexCount--; } /* Else if vertex found set agNew start and ends. */ else { agNew->tStart = min(agNew->vPositions[i], agNew->tStart); agNew->tEnd = max(agNew->vPositions[i], agNew->tEnd); } } /* Not going to worry about mRNAs that aren't used anymore. Leave them in for now. */ agNew->vertexCount = vCountNew; agNew->edgeCount = edgeCountNew; if(agNew->vertexCount == 0 || agNew->edgeCount == 0) { altGraphXFree(&agNew); return NULL; } for(i=0; i<agNew->edgeCount; i++) { if(agNew->edgeStarts[i] >= agNew->vertexCount || agNew->edgeEnds[i] >= agNew->vertexCount) { warn("For %s vertexes occur at %d when in reality there are only %d vertices.", agNew->name, max(agNew->edgeStarts[i], agNew->edgeEnds[i]), agNew->vertexCount); } } /* If it is on the other strand reverse it. */ if(reverse) { altGraphXReverse(agNew); } chainFree(&workingChainFree); return agNew; }
void reportCassette(struct altGraphX *ag, bool **em, int vs, int ve1, int ve2, int altBpStart, int altBpEnd, int startV, int endV, FILE *out) /* Write out both an altGraphX and two bed files. For a cassette exon the edges are - Name Vertexes Class ------ ---------- ----- exon1: startV->vs constitutive (cons 0) junction1: vs->ve1 alternative1 (alt1 1) exon2: ve1->altBpEnd alternative1 (alt1 1) junction2: altBpEnd->ve2 alternative1 (alt1 1) exon3: ve2->endV constitutive (cons 0) junction3: vs->ve2 alternative2 (alt2 2) */ { struct altGraphX *agLoc = NULL; /* Local altGraphX. */ struct evidence *ev = NULL, *evLoc = NULL; int *vPos = ag->vPositions; unsigned char *vT = ag->vTypes; int *vPosLoc = NULL; /* Vertex Positions. */ int *eStartsLoc = NULL; /* Edge Starts. */ int *eEndsLoc = NULL; /* Edge ends. */ unsigned char *vTLoc = NULL; /* Vertex Types. */ int *eTLoc = NULL; /* Edge Types. */ int vCLoc = 0; int eCLoc = 0; int i =0; struct dyString *dy = NULL; if(out == NULL) return; AllocVar(agLoc); agLoc->tName = cloneString(ag->tName); agLoc->name = cloneString(ag->name); agLoc->tStart = vPos[startV]; agLoc->tEnd = vPos[endV]; agLoc->strand[0] = ag->strand[0]; agLoc->vertexCount = vCLoc = 6; agLoc->edgeCount = eCLoc = 6; agLoc->id = altCassette; /* Allocate some arrays. */ AllocArray(vPosLoc, vCLoc); AllocArray(eStartsLoc, vCLoc); AllocArray(eEndsLoc, vCLoc); AllocArray(vTLoc, vCLoc); AllocArray(eTLoc, vCLoc); /* Fill in the vertex positions. */ vPosLoc[0] = vPos[startV]; vPosLoc[1] = vPos[vs]; vPosLoc[2] = vPos[ve1]; vPosLoc[3] = vPos[altBpEnd]; vPosLoc[4] = vPos[ve2]; vPosLoc[5] = vPos[endV]; /* Fill in the vertex types. */ vTLoc[0] = vT[startV]; vTLoc[1] = vT[vs]; vTLoc[2] = vT[ve1]; vTLoc[3] = vT[altBpEnd]; vTLoc[4] = vT[ve2]; vTLoc[5] = vT[endV]; /* Fill in the edges. */ /* Constitutive first exon. */ eStartsLoc[0] = 0; eEndsLoc[0] = 1; eTLoc[0] = 0; ev = evidenceForEdge(ag, startV, vs); evLoc = CloneVar(ev); evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount); slAddHead(&agLoc->evidence, evLoc); /* Exon inclusion junction. */ eStartsLoc[1] = 1; eEndsLoc[1] = 2; eTLoc[1] = 1; ev = evidenceForEdge(ag, vs, ve1); evLoc = CloneVar(ev); evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount); slAddHead(&agLoc->evidence, evLoc); /* Exon exclusion junction. */ eStartsLoc[2] = 1; eEndsLoc[2] = 4; eTLoc[2] = 2; ev = evidenceForEdge(ag, vs, ve2); evLoc = CloneVar(ev); evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount); slAddHead(&agLoc->evidence, evLoc); /* Cassette exon. */ eStartsLoc[3] = 2; eEndsLoc[3] = 3; eTLoc[3] = 1; ev = evidenceForEdge(ag, ve1, altBpEnd); evLoc = CloneVar(ev); evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount); slAddHead(&agLoc->evidence, evLoc); /* Exon inclusion junction. */ eStartsLoc[4] = 3; eEndsLoc[4] = 4; eTLoc[4] = 1; ev = evidenceForEdge(ag, altBpEnd, ve2); evLoc = CloneVar(ev); evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount); slAddHead(&agLoc->evidence, evLoc); /* Constitutive second exon. */ eStartsLoc[5] = 4; eEndsLoc[5] = 5; eTLoc[5] = 0; ev = evidenceForEdge(ag, ve2, endV); evLoc = CloneVar(ev); evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount); slAddHead(&agLoc->evidence, evLoc); slReverse(&agLoc->evidence); dy = newDyString(ag->mrnaRefCount*36); agLoc->mrnaRefCount = ag->mrnaRefCount; for(i=0; i<ag->mrnaRefCount; i++) dyStringPrintf(dy, "%s,", ag->mrnaRefs[i]); sqlStringDynamicArray(dy->string, &agLoc->mrnaRefs, &i); dyStringFree(&dy); agLoc->mrnaTissues = CloneArray(ag->mrnaTissues, ag->mrnaRefCount); agLoc->mrnaLibs = CloneArray(ag->mrnaLibs, ag->mrnaRefCount); agLoc->vPositions = vPosLoc; agLoc->edgeStarts = eStartsLoc; agLoc->edgeEnds = eEndsLoc; agLoc->vTypes = vTLoc; agLoc->edgeTypes = eTLoc; altGraphXTabOut(agLoc, out); altGraphXFree(&agLoc); }
void reportAlt3Prime(struct altGraphX *ag, bool **em, int vs, int ve1, int ve2, int altBpStart, int altBpEnd, int startV, int endV, FILE *out) /* Write out an altGraphX record for an alt3Prime splicing event. Variable names are consistent with the rest of the program, but can be misleading. Specifically vs = start of alt splicing, ve1 = first end of alt splicing, etc. even though "vs" is really the end of an exon. For an alt5Prime splice the edges are: Name Vertexes Class ------ ---------- ----- exon1: startV->vs constituative (0) junction1: vs->ve1 alternative (1) junction2: vs->ve2 alternative (2) exon2: ve1->e2 alternative (1) exon3: ve2->endV constituative (0) */ { struct altGraphX *agLoc = NULL; /* Local altGraphX. */ struct evidence *ev = NULL, *evLoc = NULL; int *vPos = ag->vPositions; unsigned char *vT = ag->vTypes; int *vPosLoc = NULL; /* Vertex Positions. */ int *eStartsLoc = NULL; /* Edge Starts. */ int *eEndsLoc = NULL; /* Edge ends. */ unsigned char *vTLoc = NULL; /* Vertex Types. */ int *eTLoc = NULL; /* Edge Types. */ int vCLoc = 0; int eCLoc = 0; int edgeIx = 0, vertexIx = 0; int i =0; struct dyString *dy = NULL; if(out == NULL) return; AllocVar(agLoc); agLoc->tName = cloneString(ag->tName); agLoc->name = cloneString(ag->name); agLoc->tStart = vPos[startV]; agLoc->tEnd = vPos[endV]; agLoc->strand[0] = ag->strand[0]; agLoc->vertexCount = vCLoc = 6; agLoc->edgeCount = eCLoc = 5; agLoc->id = alt3Prime; /* Allocate some arrays. */ AllocArray(vPosLoc, vCLoc); AllocArray(eStartsLoc, eCLoc); AllocArray(eEndsLoc, eCLoc); AllocArray(vTLoc, vCLoc); AllocArray(eTLoc, eCLoc); /* Fill in the vertex positions. */ vertexIx = 0; vPosLoc[vertexIx++] = vPos[startV]; /* 0 */ vPosLoc[vertexIx++] = vPos[vs]; /* 1 */ vPosLoc[vertexIx++] = vPos[ve1]; /* 2 */ vPosLoc[vertexIx++] = vPos[ve2]; /* 3 */ vPosLoc[vertexIx++] = vPos[ve2]; /* 4 */ vPosLoc[vertexIx++] = vPos[endV]; /* 5 */ /* Fill in the vertex types. */ vertexIx = 0; vTLoc[vertexIx++] = vT[startV]; vTLoc[vertexIx++] = vT[vs]; vTLoc[vertexIx++] = vT[ve1]; vTLoc[vertexIx++] = vT[vs]; /* Faking a separate exon for the alt spliced portion. */ vTLoc[vertexIx++] = vT[ve2]; vTLoc[vertexIx++] = vT[endV]; edgeIx = 0; /* Constitutive first exon. */ eStartsLoc[edgeIx] = 0; eEndsLoc[edgeIx] = 1; eTLoc[edgeIx] = 0; ev = evidenceForEdge(ag, startV, vs); evLoc = CloneVar(ev); evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount); slAddHead(&agLoc->evidence, evLoc); edgeIx++; /* Alternative1 junction (shorter). */ eStartsLoc[edgeIx] = 1; eEndsLoc[edgeIx] = 2; eTLoc[edgeIx] = 1; ev = evidenceForEdge(ag, vs, ve1); evLoc = CloneVar(ev); evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount); slAddHead(&agLoc->evidence, evLoc); edgeIx++; /* Alt2 junction (longer). */ eStartsLoc[edgeIx] = 1; eEndsLoc[edgeIx] = 4; eTLoc[edgeIx] = 2; ev = evidenceForEdge(ag, vs, ve2); evLoc = CloneVar(ev); evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount); slAddHead(&agLoc->evidence, evLoc); edgeIx++; /* Alt1 portion of second exon. */ eStartsLoc[edgeIx] = 2; eEndsLoc[edgeIx] = 3; eTLoc[edgeIx] = 1; ev = evidenceForEdge(ag, ve1, endV); evLoc = CloneVar(ev); evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount); slAddHead(&agLoc->evidence, evLoc); edgeIx++; /* Exon 2 constitutive (shorter exon) */ eStartsLoc[edgeIx] = 4; eEndsLoc[edgeIx] = 5; eTLoc[edgeIx] = 0; ev = evidenceForEdge(ag, ve2, endV); evLoc = CloneVar(ev); evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount); slAddHead(&agLoc->evidence, evLoc); edgeIx++; /* Package up the evidence, tissues, etc. */ slReverse(&agLoc->evidence); dy = newDyString(ag->mrnaRefCount*36); agLoc->mrnaRefCount = ag->mrnaRefCount; for(i=0; i<ag->mrnaRefCount; i++) dyStringPrintf(dy, "%s,", ag->mrnaRefs[i]); sqlStringDynamicArray(dy->string, &agLoc->mrnaRefs, &i); dyStringFree(&dy); agLoc->mrnaTissues = CloneArray(ag->mrnaTissues, ag->mrnaRefCount); agLoc->mrnaLibs = CloneArray(ag->mrnaLibs, ag->mrnaRefCount); agLoc->vPositions = vPosLoc; agLoc->edgeStarts = eStartsLoc; agLoc->edgeEnds = eEndsLoc; agLoc->vTypes = vTLoc; agLoc->edgeTypes = eTLoc; altGraphXTabOut(agLoc, out); altGraphXFree(&agLoc); }