Esempio n. 1
0
void loadOrthoAgxList(struct txGraph *ag, struct indexedChain *ixc, struct hash *orthoGraphHash,
				   boolean *revRet, struct txGraph **orthoAgListRet)
/** Return the txGraph records in the orhtologous position on the other genome
    as defined by ag and chain. */
{
int qs = 0, qe = 0;
struct txGraph *orthoAgList = NULL; 
struct chain *subChain = NULL, *toFree = NULL;
boolean reverse = FALSE;
char *strand = NULL;
if(ixc != NULL) 
    {
    /* First find the orthologous splicing graph. */
    indexedChainSubsetOnT(ixc, ag->tStart, ag->tEnd, &subChain, &toFree);    
    if(subChain != NULL)
	{
	qChainRangePlusStrand(subChain, &qs, &qe);
	if (subChain->qStrand == '-')
	    reverse = TRUE;
	if(reverse)
	    { 
	    if(ag->strand[0] == '+')
		strand = "-";
	    else
		strand = "+";
	    }
	else
	    strand = ag->strand;
	orthoAgList = agxForCoordinates(subChain->qName, qs, qe, strand[0], orthoGraphHash);
	chainFreeList(&toFree);
	}
    }
*revRet = reverse;
*orthoAgListRet = orthoAgList;
}
Esempio n. 2
0
void chainPair(struct seqPair *sp, FILE *f)
/* Make chains for all alignments in sp. */
{
long startTime, dt;
struct axt *axt;
struct cBlock *blockList, *block;
struct chain *chainList = NULL, *chain;

uglyf("%s %d nodes\n", sp->name, slCount(sp->blockList));

/* Make up tree and time it for debugging. */
startTime = clock1000();
chainList = chainBlocks(&sp->blockList, gapCost);
dt = clock1000() - startTime;
uglyf("Made %d chains in %5.3f seconds\n", slCount(chainList), dt*0.001);

/* Dump chains to file. */
for (chain = chainList; chain != NULL; chain = chain->next)
    {
    struct cBlock *first = chain->blockList;
    struct cBlock *last = slLastEl(first);
    struct cBlock *block;
    fprintf(f, "%s Chain %d, score %d, %d %d, %d %d:\n", 
	sp->name, slCount(chain->blockList), chain->score,
	first->qStart, last->qEnd, first->tStart, last->qEnd);
    for (block = chain->blockList; block != NULL; block = block->next)
        {
	fprintf(f, " %s q %d, t %d, score %d\n", sp->name,
		block->qStart, block->tStart, block->score);
	}
    fprintf(f, "\n");
    }
chainFreeList(&chainList);
uglyf("\n");
}
Esempio n. 3
0
static void ssFindBestBig(struct ffAli *ffList, bioSeq *qSeq, bioSeq *tSeq,
	enum ffStringency stringency, boolean isProt, struct trans3 *t3List,
	struct ffAli **retBestAli, int *retScore, struct ffAli **retLeftovers)
/* Go set up things to call chainBlocks to find best way to string together
 * blocks in alignment. */
{
struct cBlock *boxList = NULL, *box, *prevBox;
struct ffAli *ff, *farRight = NULL;
struct lm *lm = lmInit(0);
int boxSize;
DNA *firstH = tSeq->dna;
struct chain *chainList, *chain, *bestChain;
int tMin = BIGNUM, tMax = -BIGNUM;


/* Make up box list for chainer. */
for (ff = ffList; ff != NULL; ff = ff->right)
    {
    lmAllocVar(lm, box);
    boxSize = ff->nEnd - ff->nStart;
    box->qStart = ff->nStart - qSeq->dna;
    box->qEnd = box->qStart + boxSize;
    if (t3List)
        {
	trans3Offsets(t3List, ff->hStart, ff->hEnd, &box->tStart, &box->tEnd);
	}
    else
        {
	box->tStart = ff->hStart - firstH;
	box->tEnd = box->tStart + boxSize;
	}
    box->data = ff;
    box->score = bioScoreMatch(isProt, ff->nStart, ff->hStart, boxSize);
    if (tMin > box->tStart) tMin = box->tStart;
    if (tMax < box->tEnd) tMax = box->tEnd;
    slAddHead(&boxList, box);
    }

/* Adjust boxes so that tMin is always 0. */
for (box = boxList; box != NULL; box = box->next)
    {
    box->tStart -= tMin;
    box->tEnd -= tMin;
    }
tMax -= tMin;

ssStringency = stringency;
ssIsProt = isProt;
chainList = chainBlocks(qSeq->name, qSeq->size, '+', "tSeq", tMax, &boxList,
	ssConnectCost, ssGapCost, NULL, NULL);

/* Fixup crossovers on best (first) chain. */
bestChain = chainList;
prevBox = bestChain->blockList;
for (box = prevBox->next; box != NULL; box = box->next)
    {
    int overlap = findOverlap(prevBox, box);
    if (overlap > 0)
        {
	struct ffAli *left = prevBox->data;
	struct ffAli *right = box->data;
	int crossover = findCrossover(left, right, overlap, isProt);
        int remain = overlap - crossover;
	left->nEnd -= remain;
	left->hEnd -= remain;
	right->nStart += crossover;
	right->hStart += crossover;
	}
    prevBox = box;
    }

/* Copy stuff from first chain to bestAli. */
farRight = NULL;
for (box = chainList->blockList; box != NULL; box = box->next)
    {
    ff = box->data;
    ff->left = farRight;
    farRight = ff;
    }
*retBestAli = ffMakeRightLinks(farRight);

/* Copy stuff from other chains to leftovers. */
farRight = NULL;
for (chain = chainList->next; chain != NULL; chain = chain->next)
    {
    for (box = chain->blockList; box != NULL; box = box->next)
        {
        ff = box->data;
	ff->left = farRight;
	farRight = ff;
	}
    }
*retLeftovers = ffMakeRightLinks(farRight);

*retScore = bestChain->score;
for (chain = chainList; chain != NULL; chain = chain->next)
    chain->blockList = NULL;	/* Don't want to free this, it's local. */
chainFreeList(&chainList);
lmCleanup(&lm);
}
Esempio n. 4
0
int main(int argc, char *argv[])
{
	FILE *f;
	struct chain *Chain;
	struct chain *SubChain, *chainToFree;
	struct chain *ch_p, *next_p;
	char buf[NUM_CHARS];
	struct lineFile *lf;
	int i = 0;
	int b = 0, e = 0;
	bool is_null = true;
	struct exons_list *homologs;
	int num_chains = 0;
	int num_homologs = 0;
	struct exons_list *repeats;
	int num_repeats = 0;
	char chr[LEN_NAME];

	strcpy(chr, "");
	if( argc == 3 ) {
		if( (f = ckopen(argv[2], "r")) ) {
			if( fgets(buf, NUM_CHARS, f) ) {
				if( sscanf(buf, "%s %d %d", chr, &b, &e) != 3 ) {
					fatalf("format errors: chr beg end in %s", buf);
				}
			}
			else {
				fatalf("%s is empty\n", argv[2]);
			}
		}
		fclose(f);
	}
	else if( argc != 4 ) {
		fatal("args: chain_file interval_text features_gff_file\n");
	}
	else {
		if( (f = ckopen(argv[2], "r")) ) {
			if( fgets(buf, NUM_CHARS, f) ) {
				if( sscanf(buf, "%s %d %d", chr, &b, &e) != 3 ) {
					fatalf("format errors: chr beg end in %s", buf);
				}
			}
			else {
				fatalf("%s is empty\n", argv[2]);
			}
		}
		fclose(f);
		
		if( (f = ckopen(argv[3], "r")) ) {
			while(fgets(buf, NUM_CHARS, f)) {
				i++;
			}
			num_repeats = i;
			repeats = (struct exons_list *) ckalloc(num_repeats * sizeof(struct exons_list));
			init_exons(repeats, 0, num_repeats-1);	
			fseek(f, 0, SEEK_SET);
			assign_gff_exons_chr(f, repeats, num_repeats, chr);
			quick_sort_inc_exons(repeats, 0, num_repeats-1, POS_BASE);
		}
		else {
			fatalf("file %s invalid\n", argv[4]);
		}
		fclose(f);
	}

	lf = lineFileOpen(argv[1], true);
	Chain = chainRead(lf);
	ch_p = Chain;
	while( (ch_p != NULL) && ((next_p = chainRead(lf)) != NULL) ) {
		ch_p->next = next_p;
		ch_p = ch_p->next;
		i++;
	}

//	printf("Number of chains: %d\n", i);
	i = 0;
	ch_p = Chain;
//	while( (i < NUM_LOOPS) && (ch_p != NULL)  ) {
	while( ch_p != NULL  ) {
//		printf("chain %d: %d-%d\n", ch_p->id, ch_p->tStart, ch_p->tEnd);	
		ch_p = ch_p->next;
		i++;
	}

	num_chains = i;
	homologs = (struct exons_list *) ckalloc(num_chains * sizeof(struct exons_list));
	i = 0;
	f = ckopen(argv[2], "r");
	while( fgets(buf, NUM_CHARS, f) ) { 	
		if( sscanf(buf, "%*s %d %d", &b, &e) != 2 ) {
			fatalf("format errors: chr beg end in %s", buf);
		}
		else {
			ch_p = Chain;

			if( ch_p != NULL ) {
				while( (ch_p != NULL) && (is_null == true) ) {
					chainSubsetOnT(ch_p, b, e, &SubChain, &chainToFree);
					if( SubChain != NULL ) is_null = false;
					ch_p = ch_p->next;
				}
			}

			if( is_null == false ) {
				if( (num_repeats == 0 ) || (is_repeats(repeats, num_repeats, SubChain->tName, SubChain->tStart, SubChain->tEnd) == false) ) {
					homologs[i].reg = assign_I(SubChain->qStart, SubChain->qEnd);
					homologs[i].dir = SubChain->qStrand;
					strcpy(homologs[i].chr, SubChain->qName);
					i++;
				}
//				printf("query: %s %d %d\n", SubChain->qName, SubChain->qStart, SubChain->qEnd);
				if( chainToFree != NULL ) {
					chainFree(&chainToFree);
				}

				while( ch_p != NULL ) {
					chainSubsetOnT(ch_p, b, e, &SubChain, &chainToFree);
					ch_p = ch_p->next;
					if( SubChain != NULL ) {
						if( (num_repeats == 0 ) || ( is_repeats(repeats, num_repeats, SubChain->tName, SubChain->tStart, SubChain->tEnd) == false )) {
							if( SubChain->qStrand == '-' ) {
								homologs[i].reg = assign_I(SubChain->qSize - SubChain->qEnd, SubChain->qSize - SubChain->qStart);
							}
							else {
								homologs[i].reg = assign_I(SubChain->qStart, SubChain->qEnd);
							}
							homologs[i].dir = SubChain->qStrand;
							strcpy(homologs[i].chr, SubChain->qName);
							i++;
						}
//						printf("query: %s %d %d\n", SubChain->qName, SubChain->qStart, SubChain->qEnd);
						if( chainToFree != NULL ) {
							chainFree(&chainToFree);
						}
					}
				}
			}
		}
	}

	num_homologs = i;
	selection_sort_exons(homologs, num_homologs);
//	print_exons_list(homologs, num_homologs);
	num_homologs = remove_redundant_intervals(homologs, num_homologs);
	print_exons_list(homologs, num_homologs);
	free(homologs);
	free(repeats);
	chainFreeList(&Chain);

	fclose(f);
	lineFileClose(&lf);

	return EXIT_SUCCESS;
}