void writeChainPart(struct dnaSeq *tChrom,
	struct nibTwoCache *qNtc, char *nibDir,
	struct chain *chain, int tStart, int tEnd, FILE *f, FILE *gapFile)
/* write out axt's from subset of chain */
{
struct dnaSeq *qSeq;
boolean isRev = (chain->qStrand == '-');
struct chain *subChain, *chainToFree;
int fullSeqSize;
int qStart;

chainSubsetOnT(chain, tStart, tEnd, &subChain, &chainToFree);
if (subChain == NULL)
    errAbort("null subchain in chain ID %d\n", chain->id);

/* Get query sequence fragment. */
nibTwoCacheSeqPart(qNtc, chain->qName, 1, 1, &fullSeqSize);
qStart = (isRev ? fullSeqSize - subChain->qEnd : subChain->qStart);
qSeq = nibTwoCacheSeqPart(qNtc, subChain->qName, qStart, 
                                subChain->qEnd - subChain->qStart, NULL);
if (isRev)
    reverseComplement(qSeq->dna, qSeq->size);

verbose(9, "fill chain id, subchain %d %s %d %d %c qOffset=%d\n", 
                subChain->id, subChain->qName,
                tStart, tEnd, subChain->qStrand, qStart);
writeAxtFromChain(subChain, qSeq, subChain->qStart, tChrom, 0, f, gapFile);
chainFree(&chainToFree);
freeDnaSeq(&qSeq);
}
void writeChainPart(struct chain *chain, int tStart, int tEnd, FILE *f,
	FILE *gapFile)
/* Write out part of a chain. */
{
struct chain *subChain, *chainToFree;

chainSubsetOnT(chain, tStart, tEnd, &subChain, &chainToFree);
assert(subChain != NULL);
chainWrite(subChain, f);
if (gapFile != NULL)
    gapWrite(subChain, gapFile);
chainFree(&chainToFree);
}
Example #3
0
struct axt *netFillToAxt(struct cnFill *fill, struct dnaSeq *tChrom , int tSize,
	struct hash *qChromHash, char *nibDir,
	struct chain *chain, boolean swap)
/* Convert subset of chain as defined by fill to axt. swap query and target if swap is true*/
{
struct dnaSeq *qSeq;
boolean isRev = (chain->qStrand == '-');
struct chain *subChain, *chainToFree;
int qOffset;
struct axt *axtList = NULL , *axt;
struct nibInfo *nib = hashFindVal(qChromHash, fill->qName);

/* Get query sequence fragment. */
    {
    if (nib == NULL)
        {
	char path[512];
	AllocVar(nib);
	safef(path, sizeof(path), "%s/%s.nib", nibDir, fill->qName);
	nib->fileName = cloneString(path);
	nibOpenVerify(path, &nib->f, &nib->size);
	hashAdd(qChromHash, fill->qName, nib);
	}
    qSeq = nibLoadPartMasked(NIB_MASK_MIXED, nib->fileName, 
    	fill->qStart, fill->qSize);
    if (isRev)
	{
        reverseComplement(qSeq->dna, qSeq->size);
	qOffset = nib->size - (fill->qStart + fill->qSize);
	}
    else
	qOffset = fill->qStart;
    }
chainSubsetOnT(chain, fill->tStart, fill->tStart + fill->tSize, 
	&subChain, &chainToFree);
if (subChain != NULL)
    {
    axtList = chainToAxt(subChain, qSeq, qOffset, tChrom, fill->tStart, 100, BIGNUM);
    if (swap)
        {
        for (axt = axtList ; axt != NULL ; axt = axt->next)
            axtSwap(axt, tSize, nib->size);
        }
    }
chainFree(&chainToFree);
freeDnaSeq(&qSeq);
return axtList;
}
Example #4
0
void chainSubSetForRegion(struct chain *chain, int blockStart, int blockEnd,
			  struct chain **subChain, struct chain **toFree)
/* Call chainSubsetOnT and check to make sure that the change wasn't too great. */
{
int size = 0;
chainSubsetOnT(chain, blockStart, blockEnd, subChain, toFree);
if(*subChain == NULL)
    return;
size = abs((*subChain)->qEnd - (*subChain)->qStart);

/* If difference is too large don't believe it. */
if(abs(size - abs(blockEnd - blockStart)) > maxExonChange) 
    {
    chainFree(toFree);
    *subChain = NULL;
    }
}
Example #5
0
static boolean mapThroughChain(struct chain *chain, double minRatio, 
	int *pStart, int *pEnd, struct chain **retSubChain,
	struct chain **retChainToFree)
/* Map interval from start to end from target to query side of chain.
 * Return FALSE if not possible, otherwise update *pStart, *pEnd. */
{
    struct chain *subChain = NULL;
    struct chain *freeChain = NULL;
    int s = *pStart, e = *pEnd;
    int oldSize = e - s;
    int newCover = 0;
    int ok = TRUE;
    
    chainSubsetOnT(chain, s, e, &subChain, &freeChain);
    if (subChain == NULL)
    {
	*retSubChain = NULL;
	*retChainToFree = NULL;
	return FALSE;
    }
    newCover = chainAliSize(subChain);
    if (newCover < oldSize * minRatio)
	ok = FALSE;
    else if (chain->qStrand == '+')
    {
	*pStart = subChain->qStart;
	*pEnd = subChain->qEnd;
    }
    else
    {
	*pStart = subChain->qSize - subChain->qEnd;
	*pEnd = subChain->qSize - subChain->qStart;
    }
    *retSubChain = subChain;
    *retChainToFree = freeChain;
    return ok;
}
Example #6
0
int main(int argc, char *argv[])
{
	FILE *f;
	struct chain *Chain;
	struct chain *SubChain, *chainToFree;
	struct chain *ch_p, *next_p;
	char buf[NUM_CHARS];
	struct lineFile *lf;
	int i = 0;
	int b = 0, e = 0;
	bool is_null = true;
	struct exons_list *homologs;
	int num_chains = 0;
	int num_homologs = 0;
	struct exons_list *repeats;
	int num_repeats = 0;
	char chr[LEN_NAME];

	strcpy(chr, "");
	if( argc == 3 ) {
		if( (f = ckopen(argv[2], "r")) ) {
			if( fgets(buf, NUM_CHARS, f) ) {
				if( sscanf(buf, "%s %d %d", chr, &b, &e) != 3 ) {
					fatalf("format errors: chr beg end in %s", buf);
				}
			}
			else {
				fatalf("%s is empty\n", argv[2]);
			}
		}
		fclose(f);
	}
	else if( argc != 4 ) {
		fatal("args: chain_file interval_text features_gff_file\n");
	}
	else {
		if( (f = ckopen(argv[2], "r")) ) {
			if( fgets(buf, NUM_CHARS, f) ) {
				if( sscanf(buf, "%s %d %d", chr, &b, &e) != 3 ) {
					fatalf("format errors: chr beg end in %s", buf);
				}
			}
			else {
				fatalf("%s is empty\n", argv[2]);
			}
		}
		fclose(f);
		
		if( (f = ckopen(argv[3], "r")) ) {
			while(fgets(buf, NUM_CHARS, f)) {
				i++;
			}
			num_repeats = i;
			repeats = (struct exons_list *) ckalloc(num_repeats * sizeof(struct exons_list));
			init_exons(repeats, 0, num_repeats-1);	
			fseek(f, 0, SEEK_SET);
			assign_gff_exons_chr(f, repeats, num_repeats, chr);
			quick_sort_inc_exons(repeats, 0, num_repeats-1, POS_BASE);
		}
		else {
			fatalf("file %s invalid\n", argv[4]);
		}
		fclose(f);
	}

	lf = lineFileOpen(argv[1], true);
	Chain = chainRead(lf);
	ch_p = Chain;
	while( (ch_p != NULL) && ((next_p = chainRead(lf)) != NULL) ) {
		ch_p->next = next_p;
		ch_p = ch_p->next;
		i++;
	}

//	printf("Number of chains: %d\n", i);
	i = 0;
	ch_p = Chain;
//	while( (i < NUM_LOOPS) && (ch_p != NULL)  ) {
	while( ch_p != NULL  ) {
//		printf("chain %d: %d-%d\n", ch_p->id, ch_p->tStart, ch_p->tEnd);	
		ch_p = ch_p->next;
		i++;
	}

	num_chains = i;
	homologs = (struct exons_list *) ckalloc(num_chains * sizeof(struct exons_list));
	i = 0;
	f = ckopen(argv[2], "r");
	while( fgets(buf, NUM_CHARS, f) ) { 	
		if( sscanf(buf, "%*s %d %d", &b, &e) != 2 ) {
			fatalf("format errors: chr beg end in %s", buf);
		}
		else {
			ch_p = Chain;

			if( ch_p != NULL ) {
				while( (ch_p != NULL) && (is_null == true) ) {
					chainSubsetOnT(ch_p, b, e, &SubChain, &chainToFree);
					if( SubChain != NULL ) is_null = false;
					ch_p = ch_p->next;
				}
			}

			if( is_null == false ) {
				if( (num_repeats == 0 ) || (is_repeats(repeats, num_repeats, SubChain->tName, SubChain->tStart, SubChain->tEnd) == false) ) {
					homologs[i].reg = assign_I(SubChain->qStart, SubChain->qEnd);
					homologs[i].dir = SubChain->qStrand;
					strcpy(homologs[i].chr, SubChain->qName);
					i++;
				}
//				printf("query: %s %d %d\n", SubChain->qName, SubChain->qStart, SubChain->qEnd);
				if( chainToFree != NULL ) {
					chainFree(&chainToFree);
				}

				while( ch_p != NULL ) {
					chainSubsetOnT(ch_p, b, e, &SubChain, &chainToFree);
					ch_p = ch_p->next;
					if( SubChain != NULL ) {
						if( (num_repeats == 0 ) || ( is_repeats(repeats, num_repeats, SubChain->tName, SubChain->tStart, SubChain->tEnd) == false )) {
							if( SubChain->qStrand == '-' ) {
								homologs[i].reg = assign_I(SubChain->qSize - SubChain->qEnd, SubChain->qSize - SubChain->qStart);
							}
							else {
								homologs[i].reg = assign_I(SubChain->qStart, SubChain->qEnd);
							}
							homologs[i].dir = SubChain->qStrand;
							strcpy(homologs[i].chr, SubChain->qName);
							i++;
						}
//						printf("query: %s %d %d\n", SubChain->qName, SubChain->qStart, SubChain->qEnd);
						if( chainToFree != NULL ) {
							chainFree(&chainToFree);
						}
					}
				}
			}
		}
	}

	num_homologs = i;
	selection_sort_exons(homologs, num_homologs);
//	print_exons_list(homologs, num_homologs);
	num_homologs = remove_redundant_intervals(homologs, num_homologs);
	print_exons_list(homologs, num_homologs);
	free(homologs);
	free(repeats);
	chainFreeList(&Chain);

	fclose(f);
	lineFileClose(&lf);

	return EXIT_SUCCESS;
}
Example #7
0
void checkExp(char *bedFileName, char *tNibDir, char *nibList)
{
struct lineFile *bf = lineFileOpen(bedFileName , TRUE), *af = NULL;
char *row[PSEUDOGENELINK_NUM_COLS] ;
struct pseudoGeneLink *ps;
char *tmpName[512], cmd[512];
struct axt *axtList = NULL, *axt, *mAxt = NULL;
struct dnaSeq *qSeq = NULL, *tSeq = NULL, *seqList = NULL;
struct nibInfo *qNib = NULL, *tNib = NULL;
FILE *op;
int ret;

if (nibHash == NULL)
    nibHash = hashNew(0);
while (lineFileNextRow(bf, row, ArraySize(row)))
    {
    struct misMatch *misMatchList = NULL;
    struct binKeeper *bk = NULL;
    struct binElement *el, *elist = NULL;
    struct psl *mPsl = NULL, *rPsl = NULL, *pPsl = NULL, *psl ;
    struct misMatch *mf = NULL;
    ps = pseudoGeneLinkLoad(row);
    tmpName[0] = cloneString(ps->name);
    chopByChar(tmpName[0], '.', tmpName, sizeof(tmpName));
    verbose(2,"name %s %s:%d-%d\n",
            ps->name, ps->chrom, ps->chromStart,ps->chromEnd);
    /* get expressed retro from hash */
    bk = hashFindVal(mrnaHash, ps->chrom);
    elist = binKeeperFindSorted(bk, ps->chromStart, ps->chromEnd ) ;
    for (el = elist; el != NULL ; el = el->next)
        {
        rPsl = el->val;
        verbose(2,"retroGene %s %s:%d-%d\n",rPsl->qName, ps->chrom, ps->chromStart,ps->chromEnd);
        }
    /* find mrnas that overlap parent gene */
    bk = hashFindVal(mrnaHash, ps->gChrom);
    elist = binKeeperFindSorted(bk, ps->gStart , ps->gEnd ) ;
    for (el = elist; el != NULL ; el = el->next)
        {
        pPsl = el->val;
        verbose(2,"parent %s %s:%d %d,%d\n",
                pPsl->qName, pPsl->tName,pPsl->tStart,
                pPsl->match, pPsl->misMatch);
        }
    /* find self chain */
    bk = hashFindVal(chainHash, ps->chrom);
    elist = binKeeperFind(bk, ps->chromStart , ps->chromEnd ) ;
    slSort(&elist, chainCmpScoreDesc);
    for (el = elist; el != NULL ; el = el->next)
        {
        struct chain *chain = el->val, *subChain, *retChainToFree, *retChainToFree2;
        int qs = chain->qStart;
        int qe = chain->qEnd;
        int id = chain->id;
        if (chain->qStrand == '-')
            {
            qs = chain->qSize - chain->qEnd;
            qe = chain->qSize - chain->qStart;
            }
        if (!sameString(chain->qName , ps->gChrom) || 
                !positiveRangeIntersection(qs, qe, ps->gStart, ps->gEnd))
            {
            verbose(2," wrong chain %s:%d-%d %s:%d-%d parent %s:%d-%d\n", 
                chain->qName, qs, qe, 
                chain->tName,chain->tStart,chain->tEnd,
                ps->gChrom,ps->gStart,ps->gEnd);
            continue;
            }
        verbose(2,"chain id %d %4.0f",chain->id, chain->score);
        chainSubsetOnT(chain, ps->chromStart+7, ps->chromEnd-7, 
            &subChain,  &retChainToFree);
        if (subChain != NULL)
            chain = subChain;
        chainSubsetOnQ(chain, ps->gStart, ps->gEnd, 
            &subChain,  &retChainToFree2);
        if (subChain != NULL)
            chain = subChain;
        if (chain->qStrand == '-')
            {
            qs = chain->qSize - chain->qEnd;
            qe = chain->qSize - chain->qStart;
            }
        verbose(2," %s:%d-%d %s:%d-%d ", 
                chain->qName, qs, qe, 
                chain->tName,chain->tStart,chain->tEnd);
        if (subChain != NULL)
            verbose(2,"subChain %s:%d-%d %s:%d-%d\n",
                    subChain->qName, subChain->qStart, subChain->qEnd, 
                    subChain->tName,subChain->tStart,subChain->tEnd);

	qNib = nibInfoFromCache(nibHash, tNibDir, chain->qName);
	tNib = nibInfoFromCache(nibHash, tNibDir, chain->tName);
	tSeq = nibInfoLoadStrand(tNib, chain->tStart, chain->tEnd, '+');
	qSeq = nibInfoLoadStrand(qNib, chain->qStart, chain->qEnd, chain->qStrand);
	axtList = chainToAxt(chain, qSeq, chain->qStart, tSeq, chain->tStart,
	    maxGap, BIGNUM);
        verbose(2,"axt count %d misMatch cnt %d\n",slCount(axtList), slCount(misMatchList));
        for (axt = axtList; axt != NULL ; axt = axt->next)
            {
            addMisMatch(&misMatchList, axt, chain->qSize);
            }
        verbose(2,"%d in mismatch list %s id %d \n",slCount(misMatchList), chain->qName, id);
        chainFree(&retChainToFree);
        chainFree(&retChainToFree2);
        break;
        }
    /* create axt of each expressed retroGene to parent gene */
        /* get alignment for each mrna overlapping retroGene */
    bk = hashFindVal(mrnaHash, ps->chrom);
    elist = binKeeperFindSorted(bk, ps->chromStart , ps->chromEnd ) ;
    {
    char queryName[512];
    char axtName[512];
    char pslName[512];
    safef(queryName, sizeof(queryName), "/tmp/query.%s.fa", ps->chrom);
    safef(axtName, sizeof(axtName), "/tmp/tmp.%s.axt", ps->chrom);
    safef(pslName, sizeof(pslName), "/tmp/tmp.%s.psl", ps->chrom);
    op = fopen(pslName,"w");
    for (el = elist ; el != NULL ; el = el->next)
        {
        psl = el->val;
        pslOutput(psl, op, '\t','\n');
        qSeq = twoBitReadSeqFrag(twoBitFile, psl->qName, 0, 0);

        if (qSeq != NULL)
            slAddHead(&seqList, qSeq);
        else
            errAbort("seq %s not found \n", psl->qName);
        }
    fclose(op);
    faWriteAll(queryName, seqList);
    safef(cmd,sizeof(cmd),"pslPretty -long -axt %s %s %s %s",pslName , nibList, queryName, axtName);
    ret = system(cmd);
    if (ret != 0)
        errAbort("ret is %d %s\n",ret,cmd);
    verbose(2, "ret is %d %s\n",ret,cmd);
    af = lineFileOpen(axtName, TRUE);
    while ((axt = axtRead(af)) != NULL)
        slAddHead(&mAxt, axt);
    lineFileClose(&af);
    }
    slReverse(&mAxt);
    /* for each parent/retro pair, count bases matching retro and parent better */
    for (el = elist; el != NULL ; el = el->next)
        {
        int i, scoreRetro=0, scoreParent=0, scoreNeither=0;
        struct dyString *parentMatch = newDyString(16*1024);
        struct dyString *retroMatch = newDyString(16*1024);
        mPsl = el->val;

        if (mAxt != NULL)
            {
            verbose(2,"mrna %s %s:%d %d,%d axt %s\n",
                    mPsl->qName, mPsl->tName,mPsl->tStart,
                    mPsl->match, mPsl->misMatch, 
                    mAxt->qName);
            assert(sameString(mPsl->qName, mAxt->qName));
            for (i = 0 ; i< (mPsl->tEnd-mPsl->tStart) ; i++)
                {
                int j = mAxt->tStart - mPsl->tStart;
                verbose(5, "listLen = %d\n",slCount(&misMatchList));
                if ((mf = matchFound(&misMatchList, (mPsl->tStart)+i)) != NULL)
                    {
                    if (toupper(mf->retroBase) == toupper(mAxt->qSym[j+i]))
                        {
                        verbose (3,"match retro[%d] %d %c == %c parent %c %d\n",
                                i,mf->retroLoc, mf->retroBase, mAxt->qSym[j+i], 
                                mf->parentBase, mf->parentLoc);
                        dyStringPrintf(retroMatch, "%d,", mf->retroLoc);
                        scoreRetro++;
                        }
                    else if (toupper(mf->parentBase) == toupper(mAxt->qSym[j+i]))
                        {
                        verbose (3,"match parent[%d] %d %c == %c retro %c %d\n",
                                i,mf->parentLoc, mf->parentBase, mAxt->qSym[j+i], 
                                mf->retroBase, mf->retroLoc);
                        dyStringPrintf(parentMatch, "%d,", mf->parentLoc);
                        scoreParent++;
                        }
                    else
                        {
                        verbose (3,"match neither[%d] %d %c != %c retro %c %d\n",
                                i,mf->parentLoc, mf->parentBase, mAxt->tSym[j+i], 
                                mf->retroBase, mf->retroLoc);
                        scoreNeither++;
                        }
                    }
                }
            verbose(2,"final score %s parent %d retro %d  neither %d\n",
                    mPsl->qName, scoreParent, scoreRetro, scoreNeither);
            fprintf(outFile,"%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%s\t%d\t%d\t%d\t%s\t%s\n",
                    ps->chrom, ps->chromStart, ps->chromEnd, ps->name, ps->score, 
                    mPsl->tName, mPsl->tStart, mPsl->tEnd, mPsl->qName, 
                    scoreParent, scoreRetro, scoreNeither, parentMatch->string, retroMatch->string);
            mAxt = mAxt->next;
            }
        dyStringFree(&parentMatch);
        dyStringFree(&retroMatch);
        }
    }
}