void fillInGene(struct chain *chain, struct genePred *gene, struct genePred **pGene) /** Fill in syntenic gene structure with initial information for gene. */ { FILE *cdsErrorFp; struct genePred *synGene = NULL; int qs, qe; struct chain *subChain=NULL, *toFree=NULL; AllocVar(synGene); chainSubSetForRegion(chain, gene->txStart, gene->txEnd , &subChain, &toFree); if(subChain == NULL) { *pGene= NULL; return; } qChainRangePlusStrand(subChain, &qs, &qe); synGene->chrom = cloneString(subChain->qName); synGene->name = cloneString(gene->name); synGene->txStart = qs; synGene->txEnd = qe; AllocArray(synGene->exonStarts, gene->exonCount); AllocArray(synGene->exonEnds, gene->exonCount); if(chain->qStrand == '+') strncpy(synGene->strand, gene->strand, sizeof(synGene->strand)); else { if(gene->strand[0] == '+') strncpy(synGene->strand, "-", sizeof(synGene->strand)); else if(gene->strand[0] == '-') strncpy(synGene->strand, "+", sizeof(synGene->strand)); else errAbort("Don't recognize strand %s from gene %s", gene->strand, gene->name); } chainFree(&toFree); chainSubSetForRegion(chain, gene->cdsStart, gene->cdsEnd , &subChain, &toFree); if(subChain == NULL ) { if(optionExists("cdsErrorFile")) { cdsErrorFp = fopen( optionVal("cdsErrorFile",NULL), "a" ); fprintf( cdsErrorFp, "%s\t%s\t%u\t%u\t%u\t%u\t%s\t%d\n", gene->name, gene->chrom, gene->txStart, gene->txEnd, gene->cdsStart, gene->cdsEnd, gene->strand, gene->exonCount ); fclose(cdsErrorFp); } *pGene = NULL; genePredFree(&synGene); return; } qChainRangePlusStrand(subChain, &qs, &qe); synGene->cdsStart = qs; synGene->cdsEnd = qe; chainFree(&toFree); *pGene = synGene; }
void loadOrthoAgxList(struct txGraph *ag, struct indexedChain *ixc, struct hash *orthoGraphHash, boolean *revRet, struct txGraph **orthoAgListRet) /** Return the txGraph records in the orhtologous position on the other genome as defined by ag and chain. */ { int qs = 0, qe = 0; struct txGraph *orthoAgList = NULL; struct chain *subChain = NULL, *toFree = NULL; boolean reverse = FALSE; char *strand = NULL; if(ixc != NULL) { /* First find the orthologous splicing graph. */ indexedChainSubsetOnT(ixc, ag->tStart, ag->tEnd, &subChain, &toFree); if(subChain != NULL) { qChainRangePlusStrand(subChain, &qs, &qe); if (subChain->qStrand == '-') reverse = TRUE; if(reverse) { if(ag->strand[0] == '+') strand = "-"; else strand = "+"; } else strand = ag->strand; orthoAgList = agxForCoordinates(subChain->qName, qs, qe, strand[0], orthoGraphHash); chainFreeList(&toFree); } } *revRet = reverse; *orthoAgListRet = orthoAgList; }
void addExonToGene(struct chain *chain, struct genePred *gene, struct genePred *synGene, int block) /** Converte block in genePred to block in orthologous genome for synGene using chain. */ { struct chain *subChain=NULL, *toFree=NULL; int qs, qe; int end = gene->exonEnds[block]; chainSubSetForRegion(chain, gene->exonStarts[block], end , &subChain, &toFree); if(subChain == NULL) return; qChainRangePlusStrand(subChain, &qs, &qe); synGene->exonStarts[synGene->exonCount] = qs; //- synGene->txStart; synGene->exonEnds[synGene->exonCount] = qe; synGene->exonCount++; chainFree(&toFree); }
void addExonToBedFromBlock(struct chain *chain, struct bed *bed, int blockStart, int blockEnd) /** Convert block to block in orthologous genome for bed using chain. */ { struct chain *subChain=NULL, *toFree=NULL; int qs, qe; chainSubSetForRegion(chain, blockStart,blockEnd , &subChain, &toFree); if(subChain == NULL) return; qChainRangePlusStrand(subChain, &qs, &qe); bed->chromStarts[bed->blockCount] = qs - bed->chromStart; bed->blockSizes[bed->blockCount] = abs(qe-qs); bed->blockCount++; chainFree(&toFree); }
boolean edgeMap(int start, int end, struct indexedChain *ixc, int *retStart, int *retEnd, boolean *retRev, boolean *retStartExact, boolean *retEndExact, int *retCoverage) /* Map edge through chain. Return FALSE if no map. */ { struct chain *subChain = NULL, *toFree = NULL; indexedChainSubsetOnT(ixc, start, end, &subChain, &toFree); if (!subChain) return FALSE; *retRev = FALSE; *retStartExact = *retEndExact = FALSE; qChainRangePlusStrand(subChain, retStart, retEnd); if (start == subChain->tStart) *retStartExact = TRUE; if (end == subChain->tEnd) *retEndExact = TRUE; if (subChain->qStrand == '-') *retRev = TRUE; *retCoverage = chainBasesInBlocks(subChain); chainFree(&toFree); return TRUE; }
struct altGraphX *mapAltGraphX(struct altGraphX *ag, struct sqlConnection *conn, char *db, char *netTable ) /* Map one altGraphX record. Return NULL if can't find. This function is getting a bit long but it isn't easy to do...*/ { struct altGraphX *agNew = NULL; struct chain *chain = NULL; struct chain *workingChain = NULL, *workingChainFree = NULL; struct chain *subChain = NULL, *toFree = NULL; int i,j,k; int edgeCountNew =0; int vCountNew=0; bool reverse = FALSE; int *starts = NULL, *sizes = NULL; int blockCount =0; /* Find the best chain (one that overlaps the most exons. */ AllocArray(starts, ag->edgeCount); AllocArray(sizes, ag->edgeCount); for(i=0; i<ag->edgeCount; i++) { if(getSpliceEdgeType(ag, i) == ggExon) { starts[blockCount] = ag->vPositions[ag->edgeStarts[i]]; sizes[blockCount] = ag->vPositions[ag->edgeEnds[i]] - ag->vPositions[ag->edgeStarts[i]]; blockCount++; } } chain = chainForBlocks(conn, db, netTable, ag->tName, ag->tStart, ag->tEnd, starts, sizes, blockCount); freez(&starts); freez(&sizes); if(chain == NULL) return NULL; /* Make a smaller chain to work on... */ chainSubSetForRegion(chain, ag->tStart-1, ag->tEnd+1, &workingChain, &workingChainFree); if(workingChain == NULL) return NULL; if (chain->qStrand == '-') reverse = TRUE; agNew = altGraphXClone(ag); freez(&agNew->tName); agNew->tName = cloneString(chain->qName); /* Map vertex positions using chain. */ for(i = 0; i < agNew->vertexCount; i++) { struct cBlock *bi = NULL; int targetPos = agNew->vPositions[i]; struct chain *subChain=NULL, *toFree=NULL; agNew->vPositions[i] = -1; chainSubSetForRegion(workingChain, targetPos , targetPos, &subChain, &toFree); if(subChain != NULL) { int qs, qe; qChainRangePlusStrand(subChain, &qs, &qe); agNew->vPositions[i] = qs; } chainFree(&toFree); } /* Prune out edges not found. */ /* Set up to remember how many edges we have and our start and stop. */ edgeCountNew = agNew->edgeCount; vCountNew = agNew->vertexCount; agNew->tStart = BIGNUM; agNew->tEnd = 0; for(i=0; i<agNew->vertexCount && i>= 0; i++) { struct evidence *ev = NULL; if(agNew->vPositions[i] == -1) { /* Adjust positions, overwriting one that isn't found. */ vCountNew--; for(j=i; j<agNew->vertexCount-1; j++) { agNew->vPositions[j] = agNew->vPositions[j+1]; agNew->vTypes[j] = agNew->vTypes[j+1]; } /* Remove edges associated with this vertex. */ for(j=0; j<agNew->edgeCount && j>=0; j++) { if(agNew->edgeStarts[j] == i || agNew->edgeEnds[j] == i) { edgeCountNew--; /* Remove evidence. */ ev = slElementFromIx(agNew->evidence, j); slRemoveEl(&agNew->evidence, ev); for(k=j; k<agNew->edgeCount -1; k++) { agNew->edgeStarts[k] = agNew->edgeStarts[k+1]; agNew->edgeEnds[k] = agNew->edgeEnds[k+1]; agNew->edgeTypes[k] = agNew->edgeTypes[k+1]; } j--; agNew->edgeCount--; } } /* Subtract off one vertex from all the others. */ for(j=0; j<agNew->edgeCount; j++) { if(agNew->edgeStarts[j] > i) agNew->edgeStarts[j]--; if(agNew->edgeEnds[j] > i) agNew->edgeEnds[j]--; } i--; agNew->vertexCount--; } /* Else if vertex found set agNew start and ends. */ else { agNew->tStart = min(agNew->vPositions[i], agNew->tStart); agNew->tEnd = max(agNew->vPositions[i], agNew->tEnd); } } /* Not going to worry about mRNAs that aren't used anymore. Leave them in for now. */ agNew->vertexCount = vCountNew; agNew->edgeCount = edgeCountNew; if(agNew->vertexCount == 0 || agNew->edgeCount == 0) { altGraphXFree(&agNew); return NULL; } for(i=0; i<agNew->edgeCount; i++) { if(agNew->edgeStarts[i] >= agNew->vertexCount || agNew->edgeEnds[i] >= agNew->vertexCount) { warn("For %s vertexes occur at %d when in reality there are only %d vertices.", agNew->name, max(agNew->edgeStarts[i], agNew->edgeEnds[i]), agNew->vertexCount); } } /* If it is on the other strand reverse it. */ if(reverse) { altGraphXReverse(agNew); } chainFree(&workingChainFree); return agNew; }