Ejemplo n.º 1
0
void fillInGene(struct chain *chain, struct genePred *gene, struct genePred **pGene)
/** Fill in syntenic gene structure with initial information for gene. */
{
FILE *cdsErrorFp;
struct genePred *synGene = NULL;
int qs, qe;
struct chain *subChain=NULL, *toFree=NULL;
AllocVar(synGene);
chainSubSetForRegion(chain, gene->txStart, gene->txEnd , &subChain, &toFree);    
if(subChain == NULL)
    {
    *pGene= NULL;
    return;
    }
qChainRangePlusStrand(subChain, &qs, &qe);
synGene->chrom = cloneString(subChain->qName);
synGene->name = cloneString(gene->name);
synGene->txStart = qs;
synGene->txEnd = qe;
AllocArray(synGene->exonStarts, gene->exonCount);
AllocArray(synGene->exonEnds, gene->exonCount);
if(chain->qStrand == '+')
    strncpy(synGene->strand,  gene->strand, sizeof(synGene->strand));
else
    {
    if(gene->strand[0] == '+')
	strncpy(synGene->strand,  "-", sizeof(synGene->strand));
    else if(gene->strand[0] == '-')
	strncpy(synGene->strand,  "+", sizeof(synGene->strand));
    else
	errAbort("Don't recognize strand %s from gene %s", gene->strand, gene->name);
    }
chainFree(&toFree);
chainSubSetForRegion(chain, gene->cdsStart, gene->cdsEnd , &subChain, &toFree);    
if(subChain == NULL )
    {
    if(optionExists("cdsErrorFile"))
        {
        cdsErrorFp = fopen( optionVal("cdsErrorFile",NULL), "a" );
        fprintf( cdsErrorFp, "%s\t%s\t%u\t%u\t%u\t%u\t%s\t%d\n", gene->name, gene->chrom, gene->txStart, 
		 gene->txEnd, gene->cdsStart, gene->cdsEnd, gene->strand, gene->exonCount );
        fclose(cdsErrorFp);
        }
    *pGene = NULL;
    genePredFree(&synGene); 
    return;
    }
qChainRangePlusStrand(subChain, &qs, &qe);
synGene->cdsStart = qs;
synGene->cdsEnd = qe;
chainFree(&toFree);
*pGene = synGene;
}
Ejemplo n.º 2
0
void loadOrthoAgxList(struct txGraph *ag, struct indexedChain *ixc, struct hash *orthoGraphHash,
				   boolean *revRet, struct txGraph **orthoAgListRet)
/** Return the txGraph records in the orhtologous position on the other genome
    as defined by ag and chain. */
{
int qs = 0, qe = 0;
struct txGraph *orthoAgList = NULL; 
struct chain *subChain = NULL, *toFree = NULL;
boolean reverse = FALSE;
char *strand = NULL;
if(ixc != NULL) 
    {
    /* First find the orthologous splicing graph. */
    indexedChainSubsetOnT(ixc, ag->tStart, ag->tEnd, &subChain, &toFree);    
    if(subChain != NULL)
	{
	qChainRangePlusStrand(subChain, &qs, &qe);
	if (subChain->qStrand == '-')
	    reverse = TRUE;
	if(reverse)
	    { 
	    if(ag->strand[0] == '+')
		strand = "-";
	    else
		strand = "+";
	    }
	else
	    strand = ag->strand;
	orthoAgList = agxForCoordinates(subChain->qName, qs, qe, strand[0], orthoGraphHash);
	chainFreeList(&toFree);
	}
    }
*revRet = reverse;
*orthoAgListRet = orthoAgList;
}
Ejemplo n.º 3
0
void addExonToGene(struct chain *chain, struct genePred *gene, struct genePred *synGene, int block)
/** Converte block in genePred to block in orthologous genome for synGene using chain. */
{
struct chain *subChain=NULL, *toFree=NULL;
int qs, qe;
int end = gene->exonEnds[block];
chainSubSetForRegion(chain, gene->exonStarts[block], end , &subChain, &toFree);    
if(subChain == NULL)
    return;
qChainRangePlusStrand(subChain, &qs, &qe);
synGene->exonStarts[synGene->exonCount] = qs; //- synGene->txStart;
synGene->exonEnds[synGene->exonCount] = qe;
synGene->exonCount++;
chainFree(&toFree);
}
Ejemplo n.º 4
0
void addExonToBedFromBlock(struct chain *chain, struct bed *bed, int blockStart, int blockEnd)
/** Convert block to block in orthologous genome for bed using chain. */
{
struct chain *subChain=NULL, *toFree=NULL;
int qs, qe;
chainSubSetForRegion(chain, blockStart,blockEnd , &subChain, &toFree);    
if(subChain == NULL)
    return;
qChainRangePlusStrand(subChain, &qs, &qe);

bed->chromStarts[bed->blockCount] = qs - bed->chromStart;
bed->blockSizes[bed->blockCount] = abs(qe-qs);
bed->blockCount++;
chainFree(&toFree);
}
Ejemplo n.º 5
0
boolean edgeMap(int start, int end, struct indexedChain *ixc,
	int *retStart, int *retEnd, boolean *retRev,
	boolean *retStartExact, boolean *retEndExact,
	int *retCoverage)
/* Map edge through chain. Return FALSE if no map. */
{
struct chain *subChain = NULL, *toFree = NULL;
indexedChainSubsetOnT(ixc, start, end, &subChain, &toFree);
if (!subChain)
    return FALSE;
*retRev = FALSE;
*retStartExact = *retEndExact = FALSE;
qChainRangePlusStrand(subChain, retStart, retEnd);
if (start == subChain->tStart)
    *retStartExact = TRUE;
if (end == subChain->tEnd)
    *retEndExact = TRUE;
if (subChain->qStrand == '-')
    *retRev = TRUE;
*retCoverage = chainBasesInBlocks(subChain);
chainFree(&toFree);
return TRUE;
}
Ejemplo n.º 6
0
struct altGraphX *mapAltGraphX(struct altGraphX *ag, struct sqlConnection *conn,
			       char *db, char *netTable )
/* Map one altGraphX record. Return NULL if can't find. This function
 is getting a bit long but it isn't easy to do...*/
{
struct altGraphX *agNew = NULL;
struct chain *chain = NULL;
struct chain *workingChain = NULL, *workingChainFree = NULL;
struct chain *subChain = NULL, *toFree = NULL;
int i,j,k;
int edgeCountNew =0;
int vCountNew=0;
bool reverse = FALSE;
int *starts = NULL, *sizes = NULL;
int blockCount =0;

/* Find the best chain (one that overlaps the most exons. */
AllocArray(starts, ag->edgeCount);
AllocArray(sizes, ag->edgeCount);
for(i=0; i<ag->edgeCount; i++)
    {
    if(getSpliceEdgeType(ag, i) == ggExon)
	{
	starts[blockCount] = ag->vPositions[ag->edgeStarts[i]];
	sizes[blockCount] = ag->vPositions[ag->edgeEnds[i]] - ag->vPositions[ag->edgeStarts[i]];
	blockCount++;
	}
    }
chain = chainForBlocks(conn, db, netTable, ag->tName, ag->tStart, ag->tEnd,
		       starts, sizes, blockCount);
freez(&starts);
freez(&sizes);

if(chain == NULL)
    return NULL;
/* Make a smaller chain to work on... */
chainSubSetForRegion(chain, ag->tStart-1, ag->tEnd+1, &workingChain, &workingChainFree);
if(workingChain == NULL)
    return NULL;
if (chain->qStrand == '-')
    reverse = TRUE;
agNew = altGraphXClone(ag);
freez(&agNew->tName);
agNew->tName = cloneString(chain->qName);
/* Map vertex positions using chain. */
for(i = 0; i < agNew->vertexCount; i++)
    {
    struct cBlock *bi = NULL;
    int targetPos = agNew->vPositions[i];
    struct chain *subChain=NULL, *toFree=NULL;
    agNew->vPositions[i] = -1;
    chainSubSetForRegion(workingChain, targetPos , targetPos, &subChain, &toFree);    
    if(subChain != NULL)
	{
	int qs, qe;
	qChainRangePlusStrand(subChain, &qs, &qe);
	agNew->vPositions[i] = qs;
	}
    chainFree(&toFree);
    }
/* Prune out edges not found. */

/* Set up to remember how many edges we have and our start and stop. */
edgeCountNew = agNew->edgeCount;
vCountNew = agNew->vertexCount;
agNew->tStart = BIGNUM;
agNew->tEnd = 0;
for(i=0; i<agNew->vertexCount && i>= 0; i++)
    {
    struct evidence *ev = NULL;
    if(agNew->vPositions[i] == -1)
	{
	/* Adjust positions, overwriting one that isn't found. */
	vCountNew--;
	for(j=i; j<agNew->vertexCount-1; j++)
	    {
	    agNew->vPositions[j] = agNew->vPositions[j+1];
	    agNew->vTypes[j] = agNew->vTypes[j+1];
	    }
	/* Remove edges associated with this vertex. */
	for(j=0; j<agNew->edgeCount && j>=0; j++)
	    {
	    if(agNew->edgeStarts[j] == i || agNew->edgeEnds[j] == i)
		{
		edgeCountNew--;
		/* Remove evidence. */
		ev = slElementFromIx(agNew->evidence, j);
		slRemoveEl(&agNew->evidence, ev);
		for(k=j; k<agNew->edgeCount -1; k++)
		    {
		    agNew->edgeStarts[k] = agNew->edgeStarts[k+1];
		    agNew->edgeEnds[k] = agNew->edgeEnds[k+1];
		    agNew->edgeTypes[k] = agNew->edgeTypes[k+1];
		    }
		j--;
		agNew->edgeCount--;
		}
	    }
	/* Subtract off one vertex from all the others. */
	for(j=0; j<agNew->edgeCount; j++)
	    {
	    if(agNew->edgeStarts[j] > i)
		agNew->edgeStarts[j]--; 
	    if(agNew->edgeEnds[j] > i)
		agNew->edgeEnds[j]--; 
	    }
	i--;
	agNew->vertexCount--;
	}
    /* Else if vertex found set agNew start and ends. */
    else
	{
	agNew->tStart = min(agNew->vPositions[i], agNew->tStart);
	agNew->tEnd = max(agNew->vPositions[i], agNew->tEnd);
	}
    }
/* Not going to worry about mRNAs that aren't used anymore. Leave them in
   for now. */
agNew->vertexCount = vCountNew;
agNew->edgeCount = edgeCountNew;
if(agNew->vertexCount == 0 || agNew->edgeCount == 0)
    {
    altGraphXFree(&agNew);
    return NULL;
    }
for(i=0; i<agNew->edgeCount; i++)
    {
    if(agNew->edgeStarts[i] >= agNew->vertexCount ||
       agNew->edgeEnds[i] >= agNew->vertexCount)
	{
	warn("For %s vertexes occur at %d when in reality there are only %d vertices.",
	     agNew->name, max(agNew->edgeStarts[i], agNew->edgeEnds[i]), agNew->vertexCount);
	}
    }
/* If it is on the other strand reverse it. */
if(reverse)
    {
    altGraphXReverse(agNew);
    }
chainFree(&workingChainFree);
return agNew;
}