void writeChainPart(struct dnaSeq *tChrom, struct nibTwoCache *qNtc, char *nibDir, struct chain *chain, int tStart, int tEnd, FILE *f, FILE *gapFile) /* write out axt's from subset of chain */ { struct dnaSeq *qSeq; boolean isRev = (chain->qStrand == '-'); struct chain *subChain, *chainToFree; int fullSeqSize; int qStart; chainSubsetOnT(chain, tStart, tEnd, &subChain, &chainToFree); if (subChain == NULL) errAbort("null subchain in chain ID %d\n", chain->id); /* Get query sequence fragment. */ nibTwoCacheSeqPart(qNtc, chain->qName, 1, 1, &fullSeqSize); qStart = (isRev ? fullSeqSize - subChain->qEnd : subChain->qStart); qSeq = nibTwoCacheSeqPart(qNtc, subChain->qName, qStart, subChain->qEnd - subChain->qStart, NULL); if (isRev) reverseComplement(qSeq->dna, qSeq->size); verbose(9, "fill chain id, subchain %d %s %d %d %c qOffset=%d\n", subChain->id, subChain->qName, tStart, tEnd, subChain->qStrand, qStart); writeAxtFromChain(subChain, qSeq, subChain->qStart, tChrom, 0, f, gapFile); chainFree(&chainToFree); freeDnaSeq(&qSeq); }
void writeChainPart(struct chain *chain, int tStart, int tEnd, FILE *f, FILE *gapFile) /* Write out part of a chain. */ { struct chain *subChain, *chainToFree; chainSubsetOnT(chain, tStart, tEnd, &subChain, &chainToFree); assert(subChain != NULL); chainWrite(subChain, f); if (gapFile != NULL) gapWrite(subChain, gapFile); chainFree(&chainToFree); }
struct axt *netFillToAxt(struct cnFill *fill, struct dnaSeq *tChrom , int tSize, struct hash *qChromHash, char *nibDir, struct chain *chain, boolean swap) /* Convert subset of chain as defined by fill to axt. swap query and target if swap is true*/ { struct dnaSeq *qSeq; boolean isRev = (chain->qStrand == '-'); struct chain *subChain, *chainToFree; int qOffset; struct axt *axtList = NULL , *axt; struct nibInfo *nib = hashFindVal(qChromHash, fill->qName); /* Get query sequence fragment. */ { if (nib == NULL) { char path[512]; AllocVar(nib); safef(path, sizeof(path), "%s/%s.nib", nibDir, fill->qName); nib->fileName = cloneString(path); nibOpenVerify(path, &nib->f, &nib->size); hashAdd(qChromHash, fill->qName, nib); } qSeq = nibLoadPartMasked(NIB_MASK_MIXED, nib->fileName, fill->qStart, fill->qSize); if (isRev) { reverseComplement(qSeq->dna, qSeq->size); qOffset = nib->size - (fill->qStart + fill->qSize); } else qOffset = fill->qStart; } chainSubsetOnT(chain, fill->tStart, fill->tStart + fill->tSize, &subChain, &chainToFree); if (subChain != NULL) { axtList = chainToAxt(subChain, qSeq, qOffset, tChrom, fill->tStart, 100, BIGNUM); if (swap) { for (axt = axtList ; axt != NULL ; axt = axt->next) axtSwap(axt, tSize, nib->size); } } chainFree(&chainToFree); freeDnaSeq(&qSeq); return axtList; }
void chainSubSetForRegion(struct chain *chain, int blockStart, int blockEnd, struct chain **subChain, struct chain **toFree) /* Call chainSubsetOnT and check to make sure that the change wasn't too great. */ { int size = 0; chainSubsetOnT(chain, blockStart, blockEnd, subChain, toFree); if(*subChain == NULL) return; size = abs((*subChain)->qEnd - (*subChain)->qStart); /* If difference is too large don't believe it. */ if(abs(size - abs(blockEnd - blockStart)) > maxExonChange) { chainFree(toFree); *subChain = NULL; } }
static boolean mapThroughChain(struct chain *chain, double minRatio, int *pStart, int *pEnd, struct chain **retSubChain, struct chain **retChainToFree) /* Map interval from start to end from target to query side of chain. * Return FALSE if not possible, otherwise update *pStart, *pEnd. */ { struct chain *subChain = NULL; struct chain *freeChain = NULL; int s = *pStart, e = *pEnd; int oldSize = e - s; int newCover = 0; int ok = TRUE; chainSubsetOnT(chain, s, e, &subChain, &freeChain); if (subChain == NULL) { *retSubChain = NULL; *retChainToFree = NULL; return FALSE; } newCover = chainAliSize(subChain); if (newCover < oldSize * minRatio) ok = FALSE; else if (chain->qStrand == '+') { *pStart = subChain->qStart; *pEnd = subChain->qEnd; } else { *pStart = subChain->qSize - subChain->qEnd; *pEnd = subChain->qSize - subChain->qStart; } *retSubChain = subChain; *retChainToFree = freeChain; return ok; }
int main(int argc, char *argv[]) { FILE *f; struct chain *Chain; struct chain *SubChain, *chainToFree; struct chain *ch_p, *next_p; char buf[NUM_CHARS]; struct lineFile *lf; int i = 0; int b = 0, e = 0; bool is_null = true; struct exons_list *homologs; int num_chains = 0; int num_homologs = 0; struct exons_list *repeats; int num_repeats = 0; char chr[LEN_NAME]; strcpy(chr, ""); if( argc == 3 ) { if( (f = ckopen(argv[2], "r")) ) { if( fgets(buf, NUM_CHARS, f) ) { if( sscanf(buf, "%s %d %d", chr, &b, &e) != 3 ) { fatalf("format errors: chr beg end in %s", buf); } } else { fatalf("%s is empty\n", argv[2]); } } fclose(f); } else if( argc != 4 ) { fatal("args: chain_file interval_text features_gff_file\n"); } else { if( (f = ckopen(argv[2], "r")) ) { if( fgets(buf, NUM_CHARS, f) ) { if( sscanf(buf, "%s %d %d", chr, &b, &e) != 3 ) { fatalf("format errors: chr beg end in %s", buf); } } else { fatalf("%s is empty\n", argv[2]); } } fclose(f); if( (f = ckopen(argv[3], "r")) ) { while(fgets(buf, NUM_CHARS, f)) { i++; } num_repeats = i; repeats = (struct exons_list *) ckalloc(num_repeats * sizeof(struct exons_list)); init_exons(repeats, 0, num_repeats-1); fseek(f, 0, SEEK_SET); assign_gff_exons_chr(f, repeats, num_repeats, chr); quick_sort_inc_exons(repeats, 0, num_repeats-1, POS_BASE); } else { fatalf("file %s invalid\n", argv[4]); } fclose(f); } lf = lineFileOpen(argv[1], true); Chain = chainRead(lf); ch_p = Chain; while( (ch_p != NULL) && ((next_p = chainRead(lf)) != NULL) ) { ch_p->next = next_p; ch_p = ch_p->next; i++; } // printf("Number of chains: %d\n", i); i = 0; ch_p = Chain; // while( (i < NUM_LOOPS) && (ch_p != NULL) ) { while( ch_p != NULL ) { // printf("chain %d: %d-%d\n", ch_p->id, ch_p->tStart, ch_p->tEnd); ch_p = ch_p->next; i++; } num_chains = i; homologs = (struct exons_list *) ckalloc(num_chains * sizeof(struct exons_list)); i = 0; f = ckopen(argv[2], "r"); while( fgets(buf, NUM_CHARS, f) ) { if( sscanf(buf, "%*s %d %d", &b, &e) != 2 ) { fatalf("format errors: chr beg end in %s", buf); } else { ch_p = Chain; if( ch_p != NULL ) { while( (ch_p != NULL) && (is_null == true) ) { chainSubsetOnT(ch_p, b, e, &SubChain, &chainToFree); if( SubChain != NULL ) is_null = false; ch_p = ch_p->next; } } if( is_null == false ) { if( (num_repeats == 0 ) || (is_repeats(repeats, num_repeats, SubChain->tName, SubChain->tStart, SubChain->tEnd) == false) ) { homologs[i].reg = assign_I(SubChain->qStart, SubChain->qEnd); homologs[i].dir = SubChain->qStrand; strcpy(homologs[i].chr, SubChain->qName); i++; } // printf("query: %s %d %d\n", SubChain->qName, SubChain->qStart, SubChain->qEnd); if( chainToFree != NULL ) { chainFree(&chainToFree); } while( ch_p != NULL ) { chainSubsetOnT(ch_p, b, e, &SubChain, &chainToFree); ch_p = ch_p->next; if( SubChain != NULL ) { if( (num_repeats == 0 ) || ( is_repeats(repeats, num_repeats, SubChain->tName, SubChain->tStart, SubChain->tEnd) == false )) { if( SubChain->qStrand == '-' ) { homologs[i].reg = assign_I(SubChain->qSize - SubChain->qEnd, SubChain->qSize - SubChain->qStart); } else { homologs[i].reg = assign_I(SubChain->qStart, SubChain->qEnd); } homologs[i].dir = SubChain->qStrand; strcpy(homologs[i].chr, SubChain->qName); i++; } // printf("query: %s %d %d\n", SubChain->qName, SubChain->qStart, SubChain->qEnd); if( chainToFree != NULL ) { chainFree(&chainToFree); } } } } } } num_homologs = i; selection_sort_exons(homologs, num_homologs); // print_exons_list(homologs, num_homologs); num_homologs = remove_redundant_intervals(homologs, num_homologs); print_exons_list(homologs, num_homologs); free(homologs); free(repeats); chainFreeList(&Chain); fclose(f); lineFileClose(&lf); return EXIT_SUCCESS; }
void checkExp(char *bedFileName, char *tNibDir, char *nibList) { struct lineFile *bf = lineFileOpen(bedFileName , TRUE), *af = NULL; char *row[PSEUDOGENELINK_NUM_COLS] ; struct pseudoGeneLink *ps; char *tmpName[512], cmd[512]; struct axt *axtList = NULL, *axt, *mAxt = NULL; struct dnaSeq *qSeq = NULL, *tSeq = NULL, *seqList = NULL; struct nibInfo *qNib = NULL, *tNib = NULL; FILE *op; int ret; if (nibHash == NULL) nibHash = hashNew(0); while (lineFileNextRow(bf, row, ArraySize(row))) { struct misMatch *misMatchList = NULL; struct binKeeper *bk = NULL; struct binElement *el, *elist = NULL; struct psl *mPsl = NULL, *rPsl = NULL, *pPsl = NULL, *psl ; struct misMatch *mf = NULL; ps = pseudoGeneLinkLoad(row); tmpName[0] = cloneString(ps->name); chopByChar(tmpName[0], '.', tmpName, sizeof(tmpName)); verbose(2,"name %s %s:%d-%d\n", ps->name, ps->chrom, ps->chromStart,ps->chromEnd); /* get expressed retro from hash */ bk = hashFindVal(mrnaHash, ps->chrom); elist = binKeeperFindSorted(bk, ps->chromStart, ps->chromEnd ) ; for (el = elist; el != NULL ; el = el->next) { rPsl = el->val; verbose(2,"retroGene %s %s:%d-%d\n",rPsl->qName, ps->chrom, ps->chromStart,ps->chromEnd); } /* find mrnas that overlap parent gene */ bk = hashFindVal(mrnaHash, ps->gChrom); elist = binKeeperFindSorted(bk, ps->gStart , ps->gEnd ) ; for (el = elist; el != NULL ; el = el->next) { pPsl = el->val; verbose(2,"parent %s %s:%d %d,%d\n", pPsl->qName, pPsl->tName,pPsl->tStart, pPsl->match, pPsl->misMatch); } /* find self chain */ bk = hashFindVal(chainHash, ps->chrom); elist = binKeeperFind(bk, ps->chromStart , ps->chromEnd ) ; slSort(&elist, chainCmpScoreDesc); for (el = elist; el != NULL ; el = el->next) { struct chain *chain = el->val, *subChain, *retChainToFree, *retChainToFree2; int qs = chain->qStart; int qe = chain->qEnd; int id = chain->id; if (chain->qStrand == '-') { qs = chain->qSize - chain->qEnd; qe = chain->qSize - chain->qStart; } if (!sameString(chain->qName , ps->gChrom) || !positiveRangeIntersection(qs, qe, ps->gStart, ps->gEnd)) { verbose(2," wrong chain %s:%d-%d %s:%d-%d parent %s:%d-%d\n", chain->qName, qs, qe, chain->tName,chain->tStart,chain->tEnd, ps->gChrom,ps->gStart,ps->gEnd); continue; } verbose(2,"chain id %d %4.0f",chain->id, chain->score); chainSubsetOnT(chain, ps->chromStart+7, ps->chromEnd-7, &subChain, &retChainToFree); if (subChain != NULL) chain = subChain; chainSubsetOnQ(chain, ps->gStart, ps->gEnd, &subChain, &retChainToFree2); if (subChain != NULL) chain = subChain; if (chain->qStrand == '-') { qs = chain->qSize - chain->qEnd; qe = chain->qSize - chain->qStart; } verbose(2," %s:%d-%d %s:%d-%d ", chain->qName, qs, qe, chain->tName,chain->tStart,chain->tEnd); if (subChain != NULL) verbose(2,"subChain %s:%d-%d %s:%d-%d\n", subChain->qName, subChain->qStart, subChain->qEnd, subChain->tName,subChain->tStart,subChain->tEnd); qNib = nibInfoFromCache(nibHash, tNibDir, chain->qName); tNib = nibInfoFromCache(nibHash, tNibDir, chain->tName); tSeq = nibInfoLoadStrand(tNib, chain->tStart, chain->tEnd, '+'); qSeq = nibInfoLoadStrand(qNib, chain->qStart, chain->qEnd, chain->qStrand); axtList = chainToAxt(chain, qSeq, chain->qStart, tSeq, chain->tStart, maxGap, BIGNUM); verbose(2,"axt count %d misMatch cnt %d\n",slCount(axtList), slCount(misMatchList)); for (axt = axtList; axt != NULL ; axt = axt->next) { addMisMatch(&misMatchList, axt, chain->qSize); } verbose(2,"%d in mismatch list %s id %d \n",slCount(misMatchList), chain->qName, id); chainFree(&retChainToFree); chainFree(&retChainToFree2); break; } /* create axt of each expressed retroGene to parent gene */ /* get alignment for each mrna overlapping retroGene */ bk = hashFindVal(mrnaHash, ps->chrom); elist = binKeeperFindSorted(bk, ps->chromStart , ps->chromEnd ) ; { char queryName[512]; char axtName[512]; char pslName[512]; safef(queryName, sizeof(queryName), "/tmp/query.%s.fa", ps->chrom); safef(axtName, sizeof(axtName), "/tmp/tmp.%s.axt", ps->chrom); safef(pslName, sizeof(pslName), "/tmp/tmp.%s.psl", ps->chrom); op = fopen(pslName,"w"); for (el = elist ; el != NULL ; el = el->next) { psl = el->val; pslOutput(psl, op, '\t','\n'); qSeq = twoBitReadSeqFrag(twoBitFile, psl->qName, 0, 0); if (qSeq != NULL) slAddHead(&seqList, qSeq); else errAbort("seq %s not found \n", psl->qName); } fclose(op); faWriteAll(queryName, seqList); safef(cmd,sizeof(cmd),"pslPretty -long -axt %s %s %s %s",pslName , nibList, queryName, axtName); ret = system(cmd); if (ret != 0) errAbort("ret is %d %s\n",ret,cmd); verbose(2, "ret is %d %s\n",ret,cmd); af = lineFileOpen(axtName, TRUE); while ((axt = axtRead(af)) != NULL) slAddHead(&mAxt, axt); lineFileClose(&af); } slReverse(&mAxt); /* for each parent/retro pair, count bases matching retro and parent better */ for (el = elist; el != NULL ; el = el->next) { int i, scoreRetro=0, scoreParent=0, scoreNeither=0; struct dyString *parentMatch = newDyString(16*1024); struct dyString *retroMatch = newDyString(16*1024); mPsl = el->val; if (mAxt != NULL) { verbose(2,"mrna %s %s:%d %d,%d axt %s\n", mPsl->qName, mPsl->tName,mPsl->tStart, mPsl->match, mPsl->misMatch, mAxt->qName); assert(sameString(mPsl->qName, mAxt->qName)); for (i = 0 ; i< (mPsl->tEnd-mPsl->tStart) ; i++) { int j = mAxt->tStart - mPsl->tStart; verbose(5, "listLen = %d\n",slCount(&misMatchList)); if ((mf = matchFound(&misMatchList, (mPsl->tStart)+i)) != NULL) { if (toupper(mf->retroBase) == toupper(mAxt->qSym[j+i])) { verbose (3,"match retro[%d] %d %c == %c parent %c %d\n", i,mf->retroLoc, mf->retroBase, mAxt->qSym[j+i], mf->parentBase, mf->parentLoc); dyStringPrintf(retroMatch, "%d,", mf->retroLoc); scoreRetro++; } else if (toupper(mf->parentBase) == toupper(mAxt->qSym[j+i])) { verbose (3,"match parent[%d] %d %c == %c retro %c %d\n", i,mf->parentLoc, mf->parentBase, mAxt->qSym[j+i], mf->retroBase, mf->retroLoc); dyStringPrintf(parentMatch, "%d,", mf->parentLoc); scoreParent++; } else { verbose (3,"match neither[%d] %d %c != %c retro %c %d\n", i,mf->parentLoc, mf->parentBase, mAxt->tSym[j+i], mf->retroBase, mf->retroLoc); scoreNeither++; } } } verbose(2,"final score %s parent %d retro %d neither %d\n", mPsl->qName, scoreParent, scoreRetro, scoreNeither); fprintf(outFile,"%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%s\t%d\t%d\t%d\t%s\t%s\n", ps->chrom, ps->chromStart, ps->chromEnd, ps->name, ps->score, mPsl->tName, mPsl->tStart, mPsl->tEnd, mPsl->qName, scoreParent, scoreRetro, scoreNeither, parentMatch->string, retroMatch->string); mAxt = mAxt->next; } dyStringFree(&parentMatch); dyStringFree(&retroMatch); } } }