void loadOrthoAgxList(struct txGraph *ag, struct indexedChain *ixc, struct hash *orthoGraphHash, boolean *revRet, struct txGraph **orthoAgListRet) /** Return the txGraph records in the orhtologous position on the other genome as defined by ag and chain. */ { int qs = 0, qe = 0; struct txGraph *orthoAgList = NULL; struct chain *subChain = NULL, *toFree = NULL; boolean reverse = FALSE; char *strand = NULL; if(ixc != NULL) { /* First find the orthologous splicing graph. */ indexedChainSubsetOnT(ixc, ag->tStart, ag->tEnd, &subChain, &toFree); if(subChain != NULL) { qChainRangePlusStrand(subChain, &qs, &qe); if (subChain->qStrand == '-') reverse = TRUE; if(reverse) { if(ag->strand[0] == '+') strand = "-"; else strand = "+"; } else strand = ag->strand; orthoAgList = agxForCoordinates(subChain->qName, qs, qe, strand[0], orthoGraphHash); chainFreeList(&toFree); } } *revRet = reverse; *orthoAgListRet = orthoAgList; }
void chainPair(struct seqPair *sp, FILE *f) /* Make chains for all alignments in sp. */ { long startTime, dt; struct axt *axt; struct cBlock *blockList, *block; struct chain *chainList = NULL, *chain; uglyf("%s %d nodes\n", sp->name, slCount(sp->blockList)); /* Make up tree and time it for debugging. */ startTime = clock1000(); chainList = chainBlocks(&sp->blockList, gapCost); dt = clock1000() - startTime; uglyf("Made %d chains in %5.3f seconds\n", slCount(chainList), dt*0.001); /* Dump chains to file. */ for (chain = chainList; chain != NULL; chain = chain->next) { struct cBlock *first = chain->blockList; struct cBlock *last = slLastEl(first); struct cBlock *block; fprintf(f, "%s Chain %d, score %d, %d %d, %d %d:\n", sp->name, slCount(chain->blockList), chain->score, first->qStart, last->qEnd, first->tStart, last->qEnd); for (block = chain->blockList; block != NULL; block = block->next) { fprintf(f, " %s q %d, t %d, score %d\n", sp->name, block->qStart, block->tStart, block->score); } fprintf(f, "\n"); } chainFreeList(&chainList); uglyf("\n"); }
static void ssFindBestBig(struct ffAli *ffList, bioSeq *qSeq, bioSeq *tSeq, enum ffStringency stringency, boolean isProt, struct trans3 *t3List, struct ffAli **retBestAli, int *retScore, struct ffAli **retLeftovers) /* Go set up things to call chainBlocks to find best way to string together * blocks in alignment. */ { struct cBlock *boxList = NULL, *box, *prevBox; struct ffAli *ff, *farRight = NULL; struct lm *lm = lmInit(0); int boxSize; DNA *firstH = tSeq->dna; struct chain *chainList, *chain, *bestChain; int tMin = BIGNUM, tMax = -BIGNUM; /* Make up box list for chainer. */ for (ff = ffList; ff != NULL; ff = ff->right) { lmAllocVar(lm, box); boxSize = ff->nEnd - ff->nStart; box->qStart = ff->nStart - qSeq->dna; box->qEnd = box->qStart + boxSize; if (t3List) { trans3Offsets(t3List, ff->hStart, ff->hEnd, &box->tStart, &box->tEnd); } else { box->tStart = ff->hStart - firstH; box->tEnd = box->tStart + boxSize; } box->data = ff; box->score = bioScoreMatch(isProt, ff->nStart, ff->hStart, boxSize); if (tMin > box->tStart) tMin = box->tStart; if (tMax < box->tEnd) tMax = box->tEnd; slAddHead(&boxList, box); } /* Adjust boxes so that tMin is always 0. */ for (box = boxList; box != NULL; box = box->next) { box->tStart -= tMin; box->tEnd -= tMin; } tMax -= tMin; ssStringency = stringency; ssIsProt = isProt; chainList = chainBlocks(qSeq->name, qSeq->size, '+', "tSeq", tMax, &boxList, ssConnectCost, ssGapCost, NULL, NULL); /* Fixup crossovers on best (first) chain. */ bestChain = chainList; prevBox = bestChain->blockList; for (box = prevBox->next; box != NULL; box = box->next) { int overlap = findOverlap(prevBox, box); if (overlap > 0) { struct ffAli *left = prevBox->data; struct ffAli *right = box->data; int crossover = findCrossover(left, right, overlap, isProt); int remain = overlap - crossover; left->nEnd -= remain; left->hEnd -= remain; right->nStart += crossover; right->hStart += crossover; } prevBox = box; } /* Copy stuff from first chain to bestAli. */ farRight = NULL; for (box = chainList->blockList; box != NULL; box = box->next) { ff = box->data; ff->left = farRight; farRight = ff; } *retBestAli = ffMakeRightLinks(farRight); /* Copy stuff from other chains to leftovers. */ farRight = NULL; for (chain = chainList->next; chain != NULL; chain = chain->next) { for (box = chain->blockList; box != NULL; box = box->next) { ff = box->data; ff->left = farRight; farRight = ff; } } *retLeftovers = ffMakeRightLinks(farRight); *retScore = bestChain->score; for (chain = chainList; chain != NULL; chain = chain->next) chain->blockList = NULL; /* Don't want to free this, it's local. */ chainFreeList(&chainList); lmCleanup(&lm); }
int main(int argc, char *argv[]) { FILE *f; struct chain *Chain; struct chain *SubChain, *chainToFree; struct chain *ch_p, *next_p; char buf[NUM_CHARS]; struct lineFile *lf; int i = 0; int b = 0, e = 0; bool is_null = true; struct exons_list *homologs; int num_chains = 0; int num_homologs = 0; struct exons_list *repeats; int num_repeats = 0; char chr[LEN_NAME]; strcpy(chr, ""); if( argc == 3 ) { if( (f = ckopen(argv[2], "r")) ) { if( fgets(buf, NUM_CHARS, f) ) { if( sscanf(buf, "%s %d %d", chr, &b, &e) != 3 ) { fatalf("format errors: chr beg end in %s", buf); } } else { fatalf("%s is empty\n", argv[2]); } } fclose(f); } else if( argc != 4 ) { fatal("args: chain_file interval_text features_gff_file\n"); } else { if( (f = ckopen(argv[2], "r")) ) { if( fgets(buf, NUM_CHARS, f) ) { if( sscanf(buf, "%s %d %d", chr, &b, &e) != 3 ) { fatalf("format errors: chr beg end in %s", buf); } } else { fatalf("%s is empty\n", argv[2]); } } fclose(f); if( (f = ckopen(argv[3], "r")) ) { while(fgets(buf, NUM_CHARS, f)) { i++; } num_repeats = i; repeats = (struct exons_list *) ckalloc(num_repeats * sizeof(struct exons_list)); init_exons(repeats, 0, num_repeats-1); fseek(f, 0, SEEK_SET); assign_gff_exons_chr(f, repeats, num_repeats, chr); quick_sort_inc_exons(repeats, 0, num_repeats-1, POS_BASE); } else { fatalf("file %s invalid\n", argv[4]); } fclose(f); } lf = lineFileOpen(argv[1], true); Chain = chainRead(lf); ch_p = Chain; while( (ch_p != NULL) && ((next_p = chainRead(lf)) != NULL) ) { ch_p->next = next_p; ch_p = ch_p->next; i++; } // printf("Number of chains: %d\n", i); i = 0; ch_p = Chain; // while( (i < NUM_LOOPS) && (ch_p != NULL) ) { while( ch_p != NULL ) { // printf("chain %d: %d-%d\n", ch_p->id, ch_p->tStart, ch_p->tEnd); ch_p = ch_p->next; i++; } num_chains = i; homologs = (struct exons_list *) ckalloc(num_chains * sizeof(struct exons_list)); i = 0; f = ckopen(argv[2], "r"); while( fgets(buf, NUM_CHARS, f) ) { if( sscanf(buf, "%*s %d %d", &b, &e) != 2 ) { fatalf("format errors: chr beg end in %s", buf); } else { ch_p = Chain; if( ch_p != NULL ) { while( (ch_p != NULL) && (is_null == true) ) { chainSubsetOnT(ch_p, b, e, &SubChain, &chainToFree); if( SubChain != NULL ) is_null = false; ch_p = ch_p->next; } } if( is_null == false ) { if( (num_repeats == 0 ) || (is_repeats(repeats, num_repeats, SubChain->tName, SubChain->tStart, SubChain->tEnd) == false) ) { homologs[i].reg = assign_I(SubChain->qStart, SubChain->qEnd); homologs[i].dir = SubChain->qStrand; strcpy(homologs[i].chr, SubChain->qName); i++; } // printf("query: %s %d %d\n", SubChain->qName, SubChain->qStart, SubChain->qEnd); if( chainToFree != NULL ) { chainFree(&chainToFree); } while( ch_p != NULL ) { chainSubsetOnT(ch_p, b, e, &SubChain, &chainToFree); ch_p = ch_p->next; if( SubChain != NULL ) { if( (num_repeats == 0 ) || ( is_repeats(repeats, num_repeats, SubChain->tName, SubChain->tStart, SubChain->tEnd) == false )) { if( SubChain->qStrand == '-' ) { homologs[i].reg = assign_I(SubChain->qSize - SubChain->qEnd, SubChain->qSize - SubChain->qStart); } else { homologs[i].reg = assign_I(SubChain->qStart, SubChain->qEnd); } homologs[i].dir = SubChain->qStrand; strcpy(homologs[i].chr, SubChain->qName); i++; } // printf("query: %s %d %d\n", SubChain->qName, SubChain->qStart, SubChain->qEnd); if( chainToFree != NULL ) { chainFree(&chainToFree); } } } } } } num_homologs = i; selection_sort_exons(homologs, num_homologs); // print_exons_list(homologs, num_homologs); num_homologs = remove_redundant_intervals(homologs, num_homologs); print_exons_list(homologs, num_homologs); free(homologs); free(repeats); chainFreeList(&Chain); fclose(f); lineFileClose(&lf); return EXIT_SUCCESS; }