static void tranalign_AddGaps(AjPSeq newseq, const AjPSeq nseq, const AjPSeq pseq, ajlong npos) { AjPStr newstr = NULL; ajuint ppos = 0; newstr = ajStrNew(); for(; ppos<ajSeqGetLen(pseq); ppos++) if(ajSeqGetSeqC(pseq)[ppos] == '-') ajStrAppendC(&newstr, "---"); else { ajStrAppendSubS(&newstr, ajSeqGetSeqS(nseq), npos, npos+2); npos+=3; } ajDebug("aligned seq=%S\n", newstr); ajSeqAssignSeqS(newseq, newstr); ajStrDel(&newstr); return; }
int main(int argc, char **argv) { AjPSeqall seqall; AjPSeq seq; AjPFile outf; AjPCod codon; AjPStr substr; ajint beg; ajint end; ajint ccnt; embInit("cusp", argc, argv); seqall = ajAcdGetSeqall("sequence"); outf = ajAcdGetOutfile("outfile"); ccnt = 0; substr = ajStrNew(); codon = ajCodNewCodenum(0); ajCodSetNameS(codon, ajFileGetPrintnameS(outf)); while(ajSeqallNext(seqall, &seq)) { beg = ajSeqallGetseqBegin(seqall); end = ajSeqallGetseqEnd(seqall); ajStrAssignSubS(&substr,ajSeqGetSeqS(seq),beg-1,end-1); ajCodSetTripletsS(codon,substr,&ccnt); } ajCodCalcUsage(codon,ccnt); ajCodSetDescC(codon, "CUSP codon usage file"); ajCodWrite(codon, outf); ajFileClose(&outf); ajStrDel(&substr); ajCodDel(&codon); ajSeqallDel(&seqall); ajSeqDel(&seq); embExit(); return 0; }
int main(int argc, char **argv) { AjPSeqall seqall; AjPSeq seq; AjPFile outf; AjPCod codon; AjPStr substr; AjBool sum; ajint ccnt; ajint beg; ajint end; float Nc; double td; embInit("chips", argc, argv); seqall = ajAcdGetSeqall("seqall"); outf = ajAcdGetOutfile("outfile"); sum = ajAcdGetBoolean("sum"); codon = ajCodNewCodenum(0); ccnt = 0; substr = ajStrNew(); while(ajSeqallNext(seqall, &seq)) { beg = ajSeqallGetseqBegin(seqall); end = ajSeqallGetseqEnd(seqall); ajStrAssignSubS(&substr,ajSeqGetSeqS(seq),beg-1,end-1); ajStrFmtUpper(&substr); ajCodSetTripletsS(codon,substr,&ccnt); if(!sum) { ajCodCalcUsage(codon,ccnt); td = ajCodCalcNc(codon); Nc = (float) td; ajFmtPrintF(outf,"%-20s Nc = %.3f\n",ajSeqGetNameC(seq),Nc); ajCodClearData(codon); } } if(sum) { ajCodCalcUsage(codon,ccnt); td = ajCodCalcNc(codon); Nc = (float) td; ajFmtPrintF(outf,"# CHIPS codon usage statistics\n\n"); ajFmtPrintF(outf,"Nc = %.3f\n",Nc); } ajFileClose(&outf); ajCodDel(&codon); ajSeqallDel(&seqall); ajSeqDel(&seq); ajStrDel(&substr); embExit(); return 0; }
void embPatternSeqSearch (AjPFeattable ftable, const AjPSeq seq, const AjPPatternSeq pat, AjBool reverse) { const void *tidy; ajuint hits; ajuint i; AjPPatComp pattern; EmbPMatMatch m = NULL; AjPFeature sf = NULL; AjPSeq revseq = NULL; AjPList list = ajListNew(); AjPStr seqstr = ajStrNew(); AjPStr seqname = ajStrNew(); AjPStr tmp = ajStrNew(); ajint adj; ajint begin; AjBool isreversed; ajint seqlen; seqlen = ajSeqGetLen(seq); if(!seqlen) return; isreversed = ajSeqIsReversedTrue(seq); if(isreversed) seqlen += ajSeqGetOffset(seq); begin = ajSeqGetBeginTrue(seq); adj = ajSeqGetEndTrue(seq); if(!ajStrGetLen(featMotifProt)) ajStrAssignC(&featMotifProt, "SO:0001067"); if(!ajStrGetLen(featMotifNuc)) ajStrAssignC(&featMotifNuc, "SO:0000714"); ajStrAssignS(&seqname,ajSeqGetNameS(seq)); pattern = ajPatternSeqGetCompiled(pat); if (reverse) { revseq = ajSeqNewSeq(seq); ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq), begin-1,adj-1); ajSeqstrReverse(&seqstr); } else ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq), begin-1,adj-1); ajStrFmtUpper(&seqstr); /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n" "'%S'\n", seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq), ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq), seqstr);*/ ajDebug("embPatternSeqSearch '%S' protein: %B reverse: %B\n", pattern->pattern, pat->Protein, reverse); embPatFuzzSearchII(pattern,begin,seqname,seqstr,list, ajPatternSeqGetMismatch(pat),&hits,&tidy); ajDebug ("embPatternSeqSearch: found %d hits\n",hits); if(!reverse) ajListReverse(list); for(i=0;i<hits;++i) { ajListPop(list,(void **)&m); if (reverse) sf = ajFeatNew(ftable, NULL, featMotifNuc, adj - m->start - m->len + begin + 1, adj - m->start + begin, 0.0, '-', 0); else { if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable)) sf = ajFeatNewProt(ftable, NULL, featMotifProt, m->start, m->start + m->len - 1, 0.0); else sf = ajFeatNew(ftable, NULL, featMotifNuc, m->start, m->start + m->len - 1, 0.0, '.', 0); } if(isreversed) ajFeatReverse(sf, seqlen); /* ajUser("isrev: %B reverse: %B begin: %d adj: %d " "start: %d len: %d seqlen: %d %d..%d '%c'\n", isreversed, reverse, begin, adj, m->start, m->len, seqlen, sf->Start, sf->End, sf->Strand); */ ajFeatSetScore(sf, (float) (m->len - m->mm)); ajFmtPrintS(&tmp, "*pat %S: %S", ajPatternSeqGetName(pat), ajPatternSeqGetPattern(pat)); ajFeatTagAdd(sf,NULL,tmp); if(m->mm) { ajFmtPrintS(&tmp, "*mismatch %d", m->mm); ajFeatTagAdd(sf, NULL, tmp); } embMatMatchDel(&m); } ajStrDel(&seqname); ajStrDel(&seqstr); ajStrDel(&tmp); ajListFree(&list); if (reverse) ajSeqDel(&revseq); return; }
void getorf_FindORFs(const AjPSeq seq, ajint len, const AjPTrn trnTable, ajuint minsize, ajuint maxsize, AjPSeqout seqout, AjBool sense, AjBool circular, ajint find, ajint *orf_no, AjBool methionine, ajint around, ORFrec *record) { AjBool ORF[3]; /* true if found an ORF */ AjBool LASTORF[3]; /* true if hit the end of an ORF past the end on the genome in this frame */ AjBool GOTSTOP[3]; /* true if found a STOP in a circular genome's frame when find = P_STOP2STOP or N_STOP2STOP */ ajint start[3]; /* possible starting position of the three frames */ ajint pos; ajint codon; char aa; ajint frame; AjPStr newstr[3]; /* strings of the three frames of ORF sequences that we are growing */ AjPSeq pep = NULL; ajint i; ajint seqlen; const char *chrseq; seqlen = ajSeqGetLen(seq); chrseq = ajSeqGetSeqC(seq); /* initialise the ORF sequences */ newstr[0] = NULL; newstr[1] = NULL; newstr[2] = NULL; /* ** initialise flags for found the last ORF past the end of a circular ** genome */ LASTORF[0] = ajFalse; LASTORF[1] = ajFalse; LASTORF[2] = ajFalse; /* initialise flags for found at least one STOP codon in a frame */ GOTSTOP[0] = ajFalse; GOTSTOP[1] = ajFalse; GOTSTOP[2] = ajFalse; if (circular || find == P_START2STOP || find == N_START2STOP || find == AROUND_START) { ORF[0] = ajFalse; ORF[1] = ajFalse; ORF[2] = ajFalse; } else { /* ** assume already in a ORF so we get ORFs at the start of the ** sequence */ ORF[0] = ajTrue; ORF[1] = ajTrue; ORF[2] = ajTrue; start[0] = 0; start[1] = 1; start[2] = 2; } for (pos=0; pos<seqlen-2; pos++) { codon = ajTrnStartStopC(trnTable, &chrseq[pos], &aa); frame = pos % 3; ajDebug("len=%d, Pos=%d, Frame=%d start/stop=%d, aa=%c", len, pos, frame, codon, aa); /* don't want to find extra ORFs when already been round circ */ if (LASTORF[frame]) continue; if (find == P_STOP2STOP || find == N_STOP2STOP || find == AROUND_INIT_STOP || find == AROUND_END_STOP) { /* look for stop codon to begin reporting ORF */ /* note that there was at least one STOP in a circular genome */ if (codon == STOP) { GOTSTOP[frame] = ajTrue; } /* write details if a STOP is hit or the end of the sequence */ if (codon == STOP || pos >= seqlen-5) { /* ** End of the sequence? If so, append any ** last codon to the sequence - otherwise, ignore the STOP ** codon */ if (codon != STOP) getorf_AppORF(find, &newstr[frame], chrseq, pos, aa); /* Already have a sequence to write out? */ if (ORF[frame]) { if (ajStrGetLen(newstr[frame]) >= minsize && ajStrGetLen(newstr[frame]) <= maxsize) { /* create a new sequence */ if (codon == STOP) getorf_WriteORF(seq, len, seqlen, sense, find, orf_no, start[frame], pos-1, newstr[frame], seqout, around); else getorf_WriteORF(seq, len, seqlen, sense, find, orf_no, start[frame], pos+2, newstr[frame], seqout, around); } ajStrSetClear(&newstr[frame]); } /* ** if its a circular genome and the STOP codon hits past ** the end of the genome in all frames, then break */ if (circular && pos >= len) { ORF[frame] = ajFalse; /* past the end of the genome */ LASTORF[frame] = ajTrue; /* finished getting ORFs */ if (LASTORF[0] && LASTORF[1] && LASTORF[2]) break; } else { /* ** hit a STOP, therefore a potential ORF to write ** out next time, even if the genome is circular */ ORF[frame] = ajTrue; start[frame] = pos+3; /* next start of the ORF */ } } else if (ORF[frame]) /* append sequence to newstr if in an ORF */ getorf_AppORF(find, &newstr[frame], chrseq, pos, aa); } else { /* Look for start: P_START2STOP N_START2STOP AROUND_START */ if (codon == START && !ORF[frame]) { /* not in a ORF already and found a START */ if (pos < len) { /* ** reset the newstr to zero length to enable ** storing the ORF for this */ ajStrSetClear(&newstr[frame]); ORF[frame] = ajTrue; /* now in an ORF */ start[frame] = pos; /* start of the ORF for this frame */ if (methionine) getorf_AppORF(find, &newstr[frame], chrseq, pos, 'M'); else getorf_AppORF(find, &newstr[frame], chrseq, pos, aa); } } else if (codon == STOP) { /* hit a STOP */ /* Already have a sequence to write out? */ if (ORF[frame]) { ORF[frame] = ajFalse; /* not in an ORF */ if (ajStrGetLen(newstr[frame]) >= minsize && ajStrGetLen(newstr[frame]) <= maxsize) { /* create a new sequence */ getorf_WriteORF(seq, len, seqlen, sense, find, orf_no, start[frame], pos-1, newstr[frame], seqout, around); } } /* ** if a circular genome and hit the STOP past ** the end of the genome in all frames, then break */ if (circular && pos >= len) { LASTORF[frame] = ajTrue; /* finished getting ORFs */ if (LASTORF[0] && LASTORF[1] && LASTORF[2]) break; } ajStrSetClear(&newstr[frame]); } else if (pos >= seqlen-5) { /* hit the end of the sequence without a stop */ /* Already have a sequence to write out? */ if (ORF[frame]) { ORF[frame] = ajFalse; /* not in an ORF */ /* ** End of the sequence? If so, append any ** last codon to the sequence - otherwise, ignore the ** STOP codon */ if (pos >= seqlen-5 && pos < seqlen-2) getorf_AppORF(find, &newstr[frame], chrseq, pos, aa); if (ajStrGetLen(newstr[frame]) >= minsize && ajStrGetLen(newstr[frame]) <= maxsize) { /* create a new sequence */ getorf_WriteORF(seq, len, seqlen, sense, find, orf_no, start[frame], pos+2, newstr[frame], seqout, around); } } /* ** if a circular genome and hit the STOP past ** the end of the genome in all frames, then break */ if (circular && pos >= len) { LASTORF[frame] = ajTrue; /* finished getting ORFs */ if (LASTORF[0] && LASTORF[1] && LASTORF[2]) break; } ajStrSetClear(&newstr[frame]); } else if (ORF[frame]) getorf_AppORF(find, &newstr[frame], chrseq, pos, aa); } } /* ** Currently miss reporting a STOP-to-STOP ORF that is ** the full length of a circular genome when there are no STOP codons in ** that frame */ if ((find == P_STOP2STOP || find == N_STOP2STOP) && circular) { if (!GOTSTOP[0]) { /* translate frame 1 into pep */ pep = ajTrnSeqOrig(trnTable, seq, 1); if (ajSeqGetLen(pep) >= minsize && ajSeqGetLen(pep) <= maxsize) getorf_WriteORF(seq, len, seqlen, sense, find, orf_no, 0, seqlen-1, ajSeqGetSeqS(pep), seqout, around); ajSeqDel(&pep); } if (!GOTSTOP[1]) { /* translate frame 2 into pep */ pep = ajTrnSeqOrig(trnTable, seq, 2); if (ajSeqGetLen(pep) >= minsize && ajSeqGetLen(pep) <= maxsize) getorf_WriteORF(seq, len, seqlen, sense, find, orf_no, 1, seqlen-1, ajSeqGetSeqS(pep), seqout, around); ajSeqDel(&pep); } if (!GOTSTOP[2]) { /* translate frame 3 into pep */ pep = ajTrnSeqOrig(trnTable, seq, 3); if (ajSeqGetLen(pep) >= minsize && ajSeqGetLen(pep) >= maxsize) getorf_WriteORF(seq, len, seqlen, sense, find, orf_no, 2, seqlen-1, ajSeqGetSeqS(pep), seqout, around); ajSeqDel(&pep); } } for (i=0;i<3;++i) ajStrDel(&newstr[i]); return; }
int main(int argc, char **argv) { AjPSeqset seq1; AjPSeqset seq2; AjPFile list; ajint n1; ajint n2; ajint *lengths1; ajint *lengths2; ajuint *order1; ajuint *order2; ajint *hits1; ajint *hits2; ajint curr1; ajint curr2; ajint tmp1; ajint tmp2 = 0; ajint i; AjPStr operator; ajint OperatorCode=0; embInit("listor", argc, argv); seq1 = ajAcdGetSeqset("firstsequences"); seq2 = ajAcdGetSeqset("secondsequences"); list = ajAcdGetOutfile("outfile"); operator = ajAcdGetListSingle("operator"); /* get the operator value */ switch(ajStrGetCharFirst(operator)) { case 'O': OperatorCode = L_OR; break; case 'A': OperatorCode = L_AND; break; case 'X': OperatorCode = L_XOR; break; case 'N': OperatorCode = L_NOT; break; default: ajFatal("Invalid operator type: %S", operator); embExitBad(); } /* get the order of seqset 1 by length */ n1 = ajSeqsetGetSize(seq1); /* lengths of seq1 entries */ lengths1 = AJCALLOC0(n1, sizeof(ajint)); /* seq1 entries which match seq2 */ hits1 = AJCALLOC0(n1, sizeof(ajint)); /* seq1 entries in length order */ order1 = AJCALLOC0(n1, sizeof(ajint)); for(i=0; i<n1; i++) { lengths1[i] = ajSeqGetLen(ajSeqsetGetseqSeq(seq1, i)); order1[i] = i; hits1[i] = -1; } ajSortIntIncI(lengths1, order1, n1); /* get the order of seqset 2 by length */ n2 = ajSeqsetGetSize(seq2); lengths2 = AJCALLOC0(n2, sizeof(ajint)); hits2 = AJCALLOC0(n2, sizeof(ajint)); order2 = AJCALLOC0(n2, sizeof(ajint)); for(i=0; i<n2; i++) { lengths2[i] = ajSeqGetLen(ajSeqsetGetseqSeq(seq2, i)); order2[i] = i; hits2[i] = -1; } ajSortIntIncI(lengths2, order2, n2); /* ** go down the two sequence sets, by size order, looking for identical **lengths */ curr1 = 0; curr2 = 0; while(curr1 < n1 && curr2 < n2) { if(lengths1[order1[curr1]] < lengths2[order2[curr2]]) /* seq1 is shorter - increment curr1 index */ curr1++; else if(lengths1[order1[curr1]] > lengths2[order2[curr2]]) /* seq2 is shorter - increment curr2 index */ curr2++; else { /* identical lengths - check all seq1/seq2 entries of this len */ for(tmp1=curr1; tmp1<n1 && lengths1[order1[tmp1]] == lengths2[order2[curr2]]; tmp1++) for(tmp2=curr2; tmp2<n2 && lengths2[order2[tmp2]] == lengths2[order2[curr2]]; tmp2++) /* check to see if the sequences are identical */ if(!ajStrCmpCaseS(ajSeqGetSeqS(ajSeqsetGetseqSeq(seq1, order1[tmp1])), ajSeqGetSeqS(ajSeqsetGetseqSeq(seq2, order2[tmp2])))) { hits1[order1[tmp1]] = order2[tmp2]; hits2[order2[tmp2]] = order1[tmp1]; } curr1 = tmp1; curr2 = tmp2; } } /* output the required entries to the list file */ listor_Output(list, OperatorCode, seq1, seq2, hits1, hits2, n1, n2); AJFREE(lengths1); AJFREE(lengths2); AJFREE(order1); AJFREE(order2); AJFREE(hits1); AJFREE(hits2); ajFileClose(&list); ajStrDel(&operator); ajSeqsetDel(&seq1); ajSeqsetDel(&seq2); embExit(); return 0; }
int main(int argc, char **argv) { AjPList list = NULL; AjPSeq seq; AjPSeq seq2; AjPStr aa0str = 0; AjPStr aa1str = 0; const char *s1; const char *s2; char *strret = NULL; ajuint i; ajuint j; ajuint k; ajint l; ajint abovethresh; ajint total; ajint starti = 0; ajint startj = 0; ajint windowsize; float thresh; AjPGraph graph = NULL; AjPGraph xygraph = NULL; float flen1; float flen2; ajuint len1; ajuint len2; AjPTime ajtime = NULL; time_t tim; AjBool boxit=AJTRUE; /* Different ticks as they need to be different for x and y due to length of string being important on x */ ajuint acceptableticksx[]= { 1,10,50,100,500,1000,1500,10000, 500000,1000000,5000000 }; ajuint acceptableticks[]= { 1,10,50,100,200,500,1000,2000,5000,10000,15000, 500000,1000000,5000000 }; ajint numbofticks = 10; float xmargin; float ymargin; float ticklen; float tickgap; float onefifth; float k2; float max; char ptr[10]; AjPMatrix matrix = NULL; ajint** sub; AjPSeqCvt cvt; AjPStr subt = NULL; ajint b1; ajint b2; ajint e1; ajint e2; AjPStr se1; AjPStr se2; ajint ithresh; AjBool stretch; PPoint ppt = NULL; float xa[1]; float ya[1]; AjPGraphdata gdata=NULL; AjPStr tit = NULL; AjIList iter = NULL; float x1 = 0.; float x2 = 0.; float y1 = 0.; float y2 = 0.; ajuint tui; se1 = ajStrNew(); se2 = ajStrNew(); embInit("dotmatcher", argc, argv); seq = ajAcdGetSeq("asequence"); seq2 = ajAcdGetSeq("bsequence"); stretch = ajAcdGetToggle("stretch"); graph = ajAcdGetGraph("graph"); xygraph = ajAcdGetGraphxy("xygraph"); windowsize = ajAcdGetInt("windowsize"); ithresh = ajAcdGetInt("threshold"); matrix = ajAcdGetMatrix("matrixfile"); sub = ajMatrixGetMatrix(matrix); cvt = ajMatrixGetCvt(matrix); thresh = (float)ithresh; ajtime = ajTimeNew(); tim = time(0); ajTimeSetLocal(ajtime, tim); b1 = ajSeqGetBegin(seq); b2 = ajSeqGetBegin(seq2); e1 = ajSeqGetEnd(seq); e2 = ajSeqGetEnd(seq2); len1 = ajSeqGetLen(seq); len2 = ajSeqGetLen(seq2); tui = ajSeqGetLen(seq); flen1 = (float) tui; tui = ajSeqGetLen(seq2); flen2 = (float) tui; ajStrAssignSubC(&se1,ajSeqGetSeqC(seq),b1-1,e1-1); ajStrAssignSubC(&se2,ajSeqGetSeqC(seq2),b2-1,e2-1); ajSeqAssignSeqS(seq,se1); ajSeqAssignSeqS(seq2,se2); s1 = ajStrGetPtr(ajSeqGetSeqS(seq)); s2 = ajStrGetPtr(ajSeqGetSeqS(seq2)); aa0str = ajStrNewRes(1+len1); /* length plus trailing blank */ aa1str = ajStrNewRes(1+len2); list = ajListNew(); for(i=0;i<len1;i++) ajStrAppendK(&aa0str,(char)ajSeqcvtGetCodeK(cvt, *s1++)); for(i=0;i<len2;i++) ajStrAppendK(&aa1str,(char)ajSeqcvtGetCodeK(cvt, *s2++)); max = (float)len1; if(len2 > max) max = (float) len2; xmargin = ymargin = max *(float)0.15; ticklen = xmargin*(float)0.1; onefifth = xmargin*(float)0.2; subt = ajStrNewC((strret= ajFmtString("(windowsize = %d, threshold = %3.2f %D)", windowsize,thresh,ajtime))); if(!stretch) { if( ajStrGetLen(ajGraphGetSubtitleS(graph)) <=1) ajGraphSetSubtitleS(graph,subt); ajGraphOpenWin(graph, (float)0.0-ymargin,(max*(float)1.35)+ymargin, (float)0.0-xmargin,(float)max+xmargin); ajGraphicsDrawposTextAtmid(flen1*(float)0.5, (float)0.0-(xmargin/(float)2.0), ajGraphGetXlabelC(graph)); ajGraphicsDrawposTextAtlineJustify((float)0.0-(xmargin*(float)0.75), flen2*(float)0.5, (float)0.0-(xmargin*(float)0.75),flen1, ajGraphGetYlabelC(graph),0.5); ajGraphicsSetCharscale(0.5); } s1= ajStrGetPtr(aa0str); s2 = ajStrGetPtr(aa1str); for(j=0; (ajint)j < (ajint)len2-windowsize;j++) { i =0; total = 0; abovethresh =0; k = j; for(l=0;l<windowsize;l++) total = total + sub[(ajint)s1[i++]][(ajint)s2[k++]]; if(total >= thresh) { abovethresh=1; starti = i-windowsize; startj = k-windowsize; } while(i < len1 && k < len2) { total = total - sub[(ajint)s1[i-windowsize]] [(ajint)s2[k-windowsize]]; total = total + sub[(ajint)s1[i]][(ajint)s2[k]]; if(abovethresh) { if(total < thresh) { abovethresh = 0; /* draw the line */ dotmatcher_pushpoint(&list,(float)starti,(float)startj, (float)i-1,(float)k-1,stretch); } } else if(total >= thresh) { starti = i-windowsize; startj = k-windowsize; abovethresh= 1; } i++; k++; } if(abovethresh) /* draw the line */ dotmatcher_pushpoint(&list,(float)starti,(float)startj, (float)i-1,(float)k-1, stretch); } for(i=0; (ajint)i < (ajint)len1-windowsize;i++) { j = 0; total = 0; abovethresh =0; k = i; for(l=0;l<windowsize;l++) total = total + sub[(ajint)s1[k++]][(ajint)s2[j++]]; if(total >= thresh) { abovethresh=1; starti = k-windowsize; startj = j-windowsize; } while(k < len1 && j < len2) { total = total - sub[(ajint)s1[k-windowsize]] [(ajint)s2[j-windowsize]]; total = total + sub[(ajint)s1[k]][(ajint)s2[j]]; if(abovethresh) { if(total < thresh) { abovethresh = 0; /* draw the line */ dotmatcher_pushpoint(&list,(float)starti,(float)startj, (float)k-1,(float)j-1,stretch); } } else if(total >= thresh) { starti = k-windowsize; startj = j-windowsize; abovethresh= 1; } j++; k++; } if(abovethresh) /* draw the line */ dotmatcher_pushpoint(&list,(float)starti,(float)startj, (float)k-1,(float)j-1, stretch); } if(boxit && !stretch) { ajGraphicsDrawposRect(0.0,0.0,flen1, flen2); i=0; while(acceptableticksx[i]*numbofticks < len1) i++; if(i<=13) tickgap = (float)acceptableticksx[i]; else tickgap = (float)acceptableticksx[10]; ticklen = xmargin*(float)0.1; onefifth = xmargin*(float)0.2; if(len2/len1 > 10 ) { /* if a lot smaller then just label start and end */ ajGraphicsDrawposLine((float)0.0,(float)0.0,(float)0.0,(float)0.0-ticklen); sprintf(ptr,"%d",b1-1); ajGraphicsDrawposTextAtmid((float)0.0,(float)0.0-(onefifth),ptr); ajGraphicsDrawposLine(flen1,(float)0.0, flen1,(float)0.0-ticklen); sprintf(ptr,"%d",len1+b1-1); ajGraphicsDrawposTextAtmid(flen1,(float)0.0-(onefifth),ptr); } else for(k2=0.0;k2<len1;k2+=tickgap) { ajGraphicsDrawposLine(k2,(float)0.0,k2,(float)0.0-ticklen); sprintf(ptr,"%d",(ajint)k2+b1-1); ajGraphicsDrawposTextAtmid(k2,(float)0.0-(onefifth),ptr); } i = 0; while(acceptableticks[i]*numbofticks < len2) i++; tickgap = (float)acceptableticks[i]; ticklen = ymargin*(float)0.01; onefifth = ymargin*(float)0.02; if(len1/len2 > 10 ) { /* if a lot smaller then just label start and end */ ajGraphicsDrawposLine((float)0.0,(float)0.0,(float)0.0-ticklen,(float)0.0); sprintf(ptr,"%d",b2-1); ajGraphicsDrawposTextAtend((float)0.0-(onefifth),(float)0.0,ptr); ajGraphicsDrawposLine((float)0.0,flen2,(float)0.0-ticklen, flen2); sprintf(ptr,"%d",len2+b2-1); ajGraphicsDrawposTextAtend((float)0.0-(onefifth),flen2,ptr); } else for(k2=0.0;k2<len2;k2+=tickgap) { ajGraphicsDrawposLine((float)0.0,k2,(float)0.0-ticklen,k2); sprintf(ptr,"%d",(ajint)k2+b2-1); ajGraphicsDrawposTextAtend((float)0.0-(onefifth),k2,ptr); } } if(!stretch) ajGraphicsClose(); else /* the xy graph for -stretch */ { tit = ajStrNew(); ajFmtPrintS(&tit,"%S",ajGraphGetTitleS(xygraph)); gdata = ajGraphdataNewI(1); xa[0] = (float)b1; ya[0] = (float)b2; ajGraphSetTitleC(xygraph,ajStrGetPtr(tit)); ajGraphSetXlabelC(xygraph,ajSeqGetNameC(seq)); ajGraphSetYlabelC(xygraph,ajSeqGetNameC(seq2)); ajGraphdataSetTypeC(gdata,"2D Plot Float"); ajGraphdataSetTitleS(gdata,subt); ajGraphdataSetMinmax(gdata,(float)b1,(float)e1,(float)b2, (float)e2); ajGraphdataSetTruescale(gdata,(float)b1,(float)e1,(float)b2, (float)e2); ajGraphxySetXstartF(xygraph,(float)b1); ajGraphxySetXendF(xygraph,(float)e1); ajGraphxySetYstartF(xygraph,(float)b2); ajGraphxySetYendF(xygraph,(float)e2); ajGraphxySetXrangeII(xygraph,b1,e1); ajGraphxySetYrangeII(xygraph,b2,e2); if(list) { iter = ajListIterNewread(list); while((ppt = ajListIterGet(iter))) { x1 = ppt->x1+b1-1; y1 = ppt->y1+b2-1; x2 = ppt->x2+b1-1; y2 = ppt->y2+b2-1; ajGraphAddLine(xygraph,x1,y1,x2,y2,0); AJFREE(ppt); } ajListIterDel(&iter); } ajGraphdataAddXY(gdata,xa,ya); ajGraphDataReplace(xygraph,gdata); ajGraphxyDisplay(xygraph,ajFalse); ajGraphicsClose(); ajStrDel(&tit); } ajListFree(&list); ajSeqDel(&seq); ajSeqDel(&seq2); ajGraphxyDel(&graph); ajGraphxyDel(&xygraph); ajMatrixDel(&matrix); ajTimeDel(&ajtime); /* deallocate memory */ ajStrDel(&aa0str); ajStrDel(&aa1str); ajStrDel(&se1); ajStrDel(&se2); ajStrDel(&subt); AJFREE(strret); /* created withing ajFmtString */ embExit(); return 0; }
int main(int argc, char *argv[]) { embInitPV("ggcsi", argc, argv, "GEMBASSY", "1.0.1"); struct soap soap; struct ns1__gcsiInputParams params; AjPSeqall seqall; AjPSeq seq; AjPStr inseq = NULL; AjPStr seqid = NULL; ajint window = 0; AjBool at = 0; AjBool purine = 0; AjBool keto = 0; AjBool pval = 0; AjPStr version = NULL; AjBool accid = ajFalse; AjPStr tmp = NULL; AjPStr parse = NULL; AjPStr gcsi = NULL; AjPStr sa = NULL; AjPStr dist = NULL; AjPStr z = NULL; AjPStr p = NULL; AjPStrTok handle = NULL; char *in0; char *result; AjPFile outf = NULL; seqall = ajAcdGetSeqall("sequence"); window = ajAcdGetInt("window"); at = ajAcdGetBoolean("at"); purine = ajAcdGetBoolean("purine"); keto = ajAcdGetBoolean("keto"); pval = ajAcdGetBoolean("pval"); version = ajAcdGetSelectSingle("gcsi"); accid = ajAcdGetBoolean("accid"); outf = ajAcdGetOutfile("outfile"); params.window = window; params.at = 0; params.purine = 0; params.keto = 0; params.p = 0; ajStrToInt(version, &(params.version)); if(at) params.at = 1; if(purine) params.purine = 1; if(keto) params.keto = 1; if(pval) params.p = 1; while(ajSeqallNext(seqall, &seq)) { soap_init(&soap); inseq = NULL; ajStrAppendC(&inseq, ">"); ajStrAppendS(&inseq, ajSeqGetNameS(seq)); ajStrAppendC(&inseq, "\n"); ajStrAppendS(&inseq, ajSeqGetSeqS(seq)); ajStrAssignS(&seqid, ajSeqGetAccS(seq)); in0 = ajCharNewS(inseq); if (soap_call_ns1__gcsi( &soap, NULL, NULL, in0, ¶ms, &result ) == SOAP_OK) { tmp = ajStrNew(); parse = ajStrNew(); gcsi = ajStrNew(); sa = ajStrNew(); dist = ajStrNew(); z = ajStrNew(); p = ajStrNew(); ajStrAssignC(&tmp, result); ajStrExchangeCC(&tmp, "<", "\n"); ajStrExchangeCC(&tmp, ">", "\n"); handle = ajStrTokenNewC(tmp, "\n"); while (ajStrTokenNextParse(&handle, &parse)) { if (ajStrIsFloat(parse)) { if(!ajStrGetLen(gcsi)) ajStrAssignS(&gcsi, parse); else if(!ajStrGetLen(sa)) ajStrAssignS(&sa, parse); else if(!ajStrGetLen(dist)) ajStrAssignS(&dist, parse); else if(!ajStrGetLen(z)) ajStrAssignS(&z, parse); else if(!ajStrGetLen(p)) ajStrAssignS(&p, parse); } } tmp = ajFmtStr("Sequence: %S GCSI: %S SA: %S DIST: %S", seqid, gcsi, sa, dist); if(pval) tmp = ajFmtStr("%S Z: %S P: %S", tmp, z, p); ajFmtPrintF(outf, "%S\n", tmp); ajStrDel(&tmp); ajStrDel(&parse); ajStrDel(&gcsi); ajStrDel(&sa); ajStrDel(&dist); ajStrDel(&z); ajStrDel(&p); } else { soap_print_fault(&soap, stderr); } soap_destroy(&soap); soap_end(&soap); soap_done(&soap); AJFREE(in0); ajStrDel(&inseq); } ajFileClose(&outf); ajSeqallDel(&seqall); ajSeqDel(&seq); ajStrDel(&seqid); embExit(); return 0; }
static void restover_printHits(const AjPSeq seq, const AjPStr seqcmp, AjPFile outf, AjPList l, const AjPStr name, ajint hits, ajint begin, ajint end, ajint mincut, ajint maxcut, AjBool plasmid, ajint sitelen, AjBool limit, const AjPTable table, AjBool alpha, AjBool frags, AjBool html) { EmbPMatMatch m = NULL; AjPStr ps = NULL; ajint *fa = NULL; ajint *fx = NULL; ajint fc = 0; ajint fn = 0; ajint fb = 0; ajint last = 0; AjPStr overhead = NULL; const AjPStr value = NULL; ajint i; ajint c = 0; ajint hang1; ajint hang2; ps = ajStrNew(); fn = 0; if(html) ajFmtPrintF(outf,"<BR>"); ajFmtPrintF(outf,"# Restrict of %S from %d to %d\n",name,begin,end); if(html) ajFmtPrintF(outf,"<BR>"); ajFmtPrintF(outf,"#\n"); if(html) ajFmtPrintF(outf,"<BR>"); ajFmtPrintF(outf,"# Minimum cuts per enzyme: %d\n",mincut); if(html) ajFmtPrintF(outf,"<BR>"); ajFmtPrintF(outf,"# Maximum cuts per enzyme: %d\n",maxcut); if(html) ajFmtPrintF(outf,"<BR>"); ajFmtPrintF(outf,"# Minimum length of recognition site: %d\n", sitelen); if(html) ajFmtPrintF(outf,"<BR>"); hits = embPatRestrictRestrict(l,hits,!limit,alpha); if(frags) { fa = AJALLOC(hits*2*sizeof(ajint)); fx = AJALLOC(hits*2*sizeof(ajint)); } ajFmtPrintF(outf,"# Number of hits with any overlap: %d\n",hits); if(html) ajFmtPrintF(outf,"<BR>"); if(html) ajFmtPrintF(outf,"</p><table border cellpadding=4 " "bgcolor=\"#FFFFF0\">\n"); if(html) ajFmtPrintF(outf, "<th>Base Number</th><th>Enzyme</th><th>Site</th>" "<th>5'</th><th>3'</th><th>[5'</th><th>3']</th>\n"); else ajFmtPrintF(outf,"# Base Number\tEnzyme\t\tSite\t\t5'\t3'\t" "[5'\t3']\n"); for(i=0;i<hits;++i) { ajListPop(l,(void **)&m); ajDebug("hit %d start:%d cut1:%d cut2:%d\n", i, m->start, m->cut1, m->cut2); hang1 = (ajint)m->cut1 - (ajint)m->start; hang2 = (ajint)m->cut2 - (ajint)m->start; if(!plasmid && (hang1>100 || hang2>100)) { embMatMatchDel(&m); continue; } if(limit) { value=ajTableFetchS(table,m->cod); if(value) ajStrAssignS(&m->cod,value); } if(m->cut2 >= m->cut1) ajStrAssignSubS(&overhead, ajSeqGetSeqS( seq), m->cut1, m->cut2-1); else { ajStrAssignSubS(&overhead, ajSeqGetSeqS( seq), m->cut2, m->cut1-1); ajStrReverse(&overhead); } ajDebug("overhead:%S seqcmp:%S\n", overhead, seqcmp); /* Print out only those who have the same overhang. */ if(ajStrMatchCaseS(overhead, seqcmp)) { if(html) { ajFmtPrintF(outf, "<tr><td>%-d</td><td>%-16s</td><td>%-16s" "</td><td>%d</td><td>%d</td></tr>\n", m->start,ajStrGetPtr(m->cod),ajStrGetPtr(m->pat), m->cut1,m->cut2); } else ajFmtPrintF(outf,"\t%-d\t%-16s%-16s%d\t%d\t\n", m->start,ajStrGetPtr(m->cod),ajStrGetPtr(m->pat), m->cut1,m->cut2); } if(frags) fa[fn++] = m->cut1; if(m->cut3 || m->cut4) { if(m->cut4 >= m->cut3) ajStrAssignSubS(&overhead, ajSeqGetSeqS( seq), m->cut3, m->cut4-1); else { ajStrAssignSubS(&overhead, ajSeqGetSeqS( seq), m->cut4, m->cut3-1); ajStrReverse(&overhead); } if(ajStrMatchCaseS(overhead, seqcmp)) { if(html) ajFmtPrintF(outf, "<tr><td>%-d</td><td>%-16s</td><td>%-16s" "</td><td></td><td></td><td>%d</td><td>%d" "</td></tr>\n", m->start,ajStrGetPtr(m->cod), ajStrGetPtr(m->pat), m->cut1,m->cut2); else ajFmtPrintF(outf,"\t%-d\t%-16s%-16s\t\t%d\t%d\t\n", m->start,ajStrGetPtr(m->cod), ajStrGetPtr(m->pat), m->cut1,m->cut2); } } /* I am not sure what fragments are doing so I left it in ...*/ /* used in the report tail in restrict - restover does much the same */ if(m->cut3 || m->cut4) { if(frags) fa[fn++] = m->cut3; /* ajFmtPrintF(*outf,"%d\t%d",m->cut3,m->cut4);*/ } ajStrDel(&overhead); embMatMatchDel(&m); } if(frags) { ajSortIntInc(fa,fn); ajFmtPrintF(outf,"\n\nFragment lengths:\n"); if(!fn || (fn==1 && plasmid)) ajFmtPrintF(outf," %d\n",end-begin+1); else { last = -1; fb = 0; for(i=0;i<fn;++i) { if((c=fa[i])!=last) fa[fb++]=c; last = c; } fn = fb; /* Calc lengths */ for(i=0;i<fn-1;++i) fx[fc++] = fa[i+1]-fa[i]; if(!plasmid) { fx[fc++] = fa[0]-begin+1; fx[fc++] = end-fa[fn-1]; } else fx[fc++] = (fa[0]-begin+1)+(end-fa[fn-1]); ajSortIntDec(fx,fc); for(i=0;i<fc;++i) ajFmtPrintF(outf," %d\n",fx[i]); } AJFREE(fa); AJFREE(fx); } ajStrDel(&ps); if(html) ajFmtPrintF(outf,"</table>\n"); return; }
int main(int argc, char **argv) { AjPSeqall queryseqs; AjPSeqset targetseqs; AjPSeq queryseq; const AjPSeq targetseq; AjPStr queryaln = 0; AjPStr targetaln = 0; AjPFile errorf; AjBool show = ajFalse; const char *queryseqc; const char *targetseqc; AjPMatrixf matrix; AjPSeqCvt cvt = 0; float **sub; ajint *compass = NULL; float *path = NULL; float gapopen; float gapextend; float score; float minscore; ajuint j, k; ajint querystart = 0; ajint targetstart = 0; ajint queryend = 0; ajint targetend = 0; ajint width = 0; AjPTable kmers = 0; ajint wordlen = 6; ajint oldmax = 0; ajint newmax = 0; ajuint ntargetseqs; ajuint nkmers; AjPAlign align = NULL; EmbPWordMatch maxmatch; /* match with maximum score */ /* Cursors for the current sequence being scanned, ** i.e., until which location it was scanned. ** Separate cursor/location entries for each sequence in the seqset. */ ajuint* lastlocation; EmbPWordRK* wordsw = NULL; AjPList* matchlist = NULL; embInit("supermatcher", argc, argv); matrix = ajAcdGetMatrixf("datafile"); queryseqs = ajAcdGetSeqall("asequence"); targetseqs= ajAcdGetSeqset("bsequence"); gapopen = ajAcdGetFloat("gapopen"); gapextend = ajAcdGetFloat("gapextend"); wordlen = ajAcdGetInt("wordlen"); align = ajAcdGetAlign("outfile"); errorf = ajAcdGetOutfile("errorfile"); width = ajAcdGetInt("width"); /* width for banded Smith-Waterman */ minscore = ajAcdGetFloat("minscore"); gapopen = ajRoundFloat(gapopen, 8); gapextend = ajRoundFloat(gapextend, 8); sub = ajMatrixfGetMatrix(matrix); cvt = ajMatrixfGetCvt(matrix); embWordLength(wordlen); /* seqset sequence is the reference sequence for SAM format */ ajAlignSetRefSeqIndx(align, 1); ajSeqsetTrim(targetseqs); ntargetseqs = ajSeqsetGetSize(targetseqs); AJCNEW0(matchlist, ntargetseqs); /* get tables of words */ for(k=0;k<ntargetseqs;k++) { targetseq = ajSeqsetGetseqSeq(targetseqs, k); embWordGetTable(&kmers, targetseq); ajDebug("Number of distinct kmers found so far: %d\n", ajTableGetLength(kmers)); } AJCNEW0(lastlocation, ntargetseqs); if(ajTableGetLength(kmers)<1) ajErr("no kmers found"); nkmers = embWordRabinKarpInit(kmers, &wordsw, wordlen, targetseqs); while(ajSeqallNext(queryseqs,&queryseq)) { ajSeqTrim(queryseq); queryaln = ajStrNewRes(1+ajSeqGetLen(queryseq)); ajDebug("Read '%S'\n", ajSeqGetNameS(queryseq)); for(k=0;k<ntargetseqs;k++) { lastlocation[k]=0; matchlist[k] = ajListstrNew(); } embWordRabinKarpSearch(ajSeqGetSeqS(queryseq), targetseqs, (const EmbPWordRK*)wordsw, wordlen, nkmers, matchlist, lastlocation, ajFalse); for(k=0;k<ajSeqsetGetSize(targetseqs);k++) { targetseq = ajSeqsetGetseqSeq(targetseqs, k); ajDebug("Processing '%S'\n", ajSeqGetNameS(targetseq)); if(ajListGetLength(matchlist[k])==0) { ajFmtPrintF(errorf, "No wordmatch start points for " "%s vs %s. No alignment\n", ajSeqGetNameC(queryseq),ajSeqGetNameC(targetseq)); embWordMatchListDelete(&matchlist[k]); continue; } /* only the maximum match is used as seed * (if there is more than one location with the maximum match * only the first one is used) * TODO: we should add a new option to make above limit optional */ maxmatch = embWordMatchFirstMax(matchlist[k]); supermatcher_findendpoints(maxmatch,targetseq, queryseq, &targetstart, &querystart, &targetend, &queryend); targetaln=ajStrNewRes(1+ajSeqGetLen(targetseq)); queryseqc = ajSeqGetSeqC(queryseq); targetseqc = ajSeqGetSeqC(targetseq); ajStrAssignC(&queryaln,""); ajStrAssignC(&targetaln,""); ajDebug("++ %S v %S start:%d %d end:%d %d\n", ajSeqGetNameS(targetseq), ajSeqGetNameS(queryseq), targetstart, querystart, targetend, queryend); newmax = (targetend-targetstart+2)*width; if(newmax > oldmax) { AJCRESIZE0(path,oldmax,newmax); AJCRESIZE0(compass,oldmax,newmax); oldmax=newmax; ajDebug("++ memory re/allocation for path/compass arrays" " to size: %d\n", newmax); } else { AJCSET0(path,newmax); AJCSET0(compass,newmax); } ajDebug("Calling embAlignPathCalcSWFast " "%d..%d [%d/%d] %d..%d [%d/%d] width:%d\n", querystart, queryend, (queryend - querystart + 1), ajSeqGetLen(queryseq), targetstart, targetend, (targetend - targetstart + 1), ajSeqGetLen(targetseq), width); score = embAlignPathCalcSWFast(&targetseqc[targetstart], &queryseqc[querystart], targetend-targetstart+1, queryend-querystart+1, 0,width, gapopen,gapextend, path,sub,cvt, compass,show); if(score>minscore) { embAlignWalkSWMatrixFast(path,compass,gapopen,gapextend, targetseq,queryseq, &targetaln,&queryaln, targetend-targetstart+1, queryend-querystart+1, 0,width, &targetstart,&querystart); if(!ajAlignFormatShowsSequences(align)) { ajAlignDefineCC(align, ajStrGetPtr(targetaln), ajStrGetPtr(queryaln), ajSeqGetNameC(targetseq), ajSeqGetNameC(queryseq)); ajAlignSetScoreR(align, score); } else { ajDebug(" queryaln:%S \ntargetaln:%S\n", queryaln,targetaln); embAlignReportLocal(align, queryseq, targetseq, queryaln, targetaln, querystart, targetstart, gapopen, gapextend, score, matrix, 1 + ajSeqGetOffset(queryseq), 1 + ajSeqGetOffset(targetseq) ); } ajAlignWrite(align); ajAlignReset(align); } ajStrDel(&targetaln); embWordMatchListDelete(&matchlist[k]); } ajStrDel(&queryaln); } for(k=0;k<nkmers;k++) { AJFREE(wordsw[k]->seqindxs); AJFREE(wordsw[k]->nSeqMatches); for(j=0;j<wordsw[k]->nseqs;j++) AJFREE(wordsw[k]->locs[j]); AJFREE(wordsw[k]->nnseqlocs); AJFREE(wordsw[k]->locs); AJFREE(wordsw[k]); } embWordFreeTable(&kmers); if(!ajAlignFormatShowsSequences(align)) ajMatrixfDel(&matrix); AJFREE(path); AJFREE(compass); AJFREE(kmers); AJFREE(wordsw); AJFREE(matchlist); AJFREE(lastlocation); ajAlignClose(align); ajAlignDel(&align); ajSeqallDel(&queryseqs); ajSeqDel(&queryseq); ajSeqsetDel(&targetseqs); ajFileClose(&errorf); embExit(); return 0; }
void emboss_copy(AjPSeqset seqset, char ***retseqs, AINFO *info) { ajint n; ajint maxlen; ajint len; char **seqs; const AjPSeq seq = NULL; ajint i=0; const AjPStr fmt=NULL; const char *p=NULL; char c='\0'; /* char *q=NULL; AjPSelexseq sqdata=NULL; AjPSelexdata sdata=NULL; */ ajint cnt=0; info->name = NULL; info->rf=NULL; info->cs=NULL; info->desc=NULL; info->acc=NULL; info->au=NULL; info->flags=0; AjPStr tmpstr = NULL; ajSeqsetFill(seqset); fmt = ajSeqsetGetFormat(seqset); n = ajSeqsetGetSize(seqset); ajSeqsetFmtUpper(seqset); maxlen = ajSeqsetGetLen(seqset); /* First allocate and copy sequences */ AJCNEW0(seqs,n); for(i=0; i<n; ++i) { seqs[i] = ajCharNewRes(maxlen+1); strcpy(seqs[i],ajSeqGetSeqC(ajSeqsetGetseqSeq(seqset,i))); } info->sqinfo = (SQINFO *) calloc (sizeof(SQINFO), n); for(i=0; i<n; ++i) { info->sqinfo[i].flags = 0; strcpy(info->sqinfo[i].name,""); strcpy(info->sqinfo[i].id,""); strcpy(info->sqinfo[i].acc,""); strcpy(info->sqinfo[i].desc,""); info->sqinfo[i].len = 0; info->sqinfo[i].start = 0; info->sqinfo[i].stop = 0; info->sqinfo[i].olen = 0; info->sqinfo[i].type = 0; info->sqinfo[i].ss = NULL; info->sqinfo[i].sa =NULL; } AJCNEW0(info->wgt,n); for(i=0; i<n; ++i) { info->sqinfo[i].flags = 0; info->wgt[i] = ajSeqsetGetseqWeight(seqset,i); } info->nseq = n; info->alen = maxlen; for(i=0; i<n; ++i) { seq = ajSeqsetGetseqSeq(seqset,i); if((len=ajStrGetLen(ajSeqGetNameS(seq)))) { if(len>= SQINFO_NAMELEN) len = SQINFO_NAMELEN - 1; ajStrAssignSubS(&tmpstr, ajSeqGetNameS(seq), 0, len); strcpy(info->sqinfo[i].id,ajStrGetPtr(tmpstr)); info->sqinfo[i].flags |= SQINFO_ID; strcpy(info->sqinfo[i].name,ajStrGetPtr(tmpstr)); info->sqinfo[i].flags |= SQINFO_NAME; } if((len=ajStrGetLen(ajSeqGetAccS(seq)))) { if(len>= SQINFO_NAMELEN) len = SQINFO_NAMELEN - 1; ajStrAssignSubS(&tmpstr, ajSeqGetAccS(seq), 0, len); strcpy(info->sqinfo[i].acc,ajStrGetPtr(tmpstr)); info->sqinfo[i].flags |= SQINFO_ACC; } } seq = ajSeqsetGetseqSeq(seqset,0); info->cs = ajCharNewS(ajSeqGetSeqS(seq)); info->name = ajCharNewS(ajSeqGetNameS(seq)); info->acc = ajCharNewS(ajSeqGetAccS(seq)); info->desc = ajCharNewS(ajSeqGetDescS(seq)); info->rf = ajCharNewS(ajSeqGetSeqS(seq)); /* info->rf = ajCharNewS(seq); len = ajStrGetLen(seq->Selexdata->name); info->name = ajCharNewRes(len+1); strcpy(info->name,ajStrGetPtr(seq->Selexdata->name)); len = ajStrGetLen(seq->Selexdata->de); info->desc = ajCharNewRes(len+1); sdata = seq->Selexdata; strcpy(info->desc,ajStrGetPtr(sdata->de)); len = ajStrGetLen(sdata->ac); info->acc = ajCharNewRes(len+1); strcpy(info->acc,ajStrGetPtr(sdata->ac)); len = ajStrGetLen(sdata->au); info->au = ajCharNewRes(len+1); strcpy(info->au,ajStrGetPtr(sdata->au)); if(sdata->tc[0] || sdata->tc[1]) { info->flags |= AINFO_TC; info->tc1 = sdata->tc[0]; info->tc2 = sdata->tc[1]; } if(sdata->nc[0] || sdata->nc[1]) { info->flags |= AINFO_NC; info->nc1 = sdata->nc[0]; info->nc2 = sdata->nc[1]; } if(sdata->ga[0] || sdata->ga[1]) { info->flags |= AINFO_GA; info->ga1 = sdata->ga[0]; info->ga2 = sdata->ga[1]; } for(i=0;i<n;++i) { seq = ajSeqsetGetseqSeq(seqset,i); sqdata = seq->Selexdata->sq; if((len=ajStrGetLen(sqdata->name))) { if(len<64) strcpy(info->sqinfo[i].name,ajStrGetPtr(sqdata->name)); else strncpy(info->sqinfo[i].name,ajStrGetPtr(sqdata->name),63); info->sqinfo[i].name[63]='\0'; info->sqinfo[i].flags |= SQINFO_NAME; } / * if((len=ajStrGetLen(sqdata->id))) { if(len<64) strcpy(info->sqinfo[i].id,ajStrGetPtr(sqdata->id)); else strncpy(info->sqinfo[i]->id,ajStrGetPtr(sqdata->id),63); info->sqinfo[i].id[63]='\0'; info->sqinfo[i].flags |= SQINFO_ID; } * / strcpy(info->sqinfo[i].id,info->sqinfo[i].name); info->sqinfo[i].flags |= SQINFO_ID; if((len=ajStrGetLen(sqdata->ac))) { if(len<64) strcpy(info->sqinfo[i].acc,ajStrGetPtr(sqdata->ac)); else strncpy(info->sqinfo[i].acc,ajStrGetPtr(sqdata->ac),63); info->sqinfo[i].acc[63]='\0'; info->sqinfo[i].flags |= SQINFO_ACC; } if((len=ajStrGetLen(sqdata->de))) { if(len<127) strcpy(info->sqinfo[i].desc,ajStrGetPtr(sqdata->de)); else strncpy(info->sqinfo[i].desc,ajStrGetPtr(sqdata->de),127); info->sqinfo[i].desc[127]='\0'; info->sqinfo[i].flags |= SQINFO_DESC; } if(sqdata->start || sqdata->stop || sqdata ->len) { info->sqinfo[i].start = sqdata->start; info->sqinfo[i].stop = sqdata->stop; info->sqinfo[i].olen = sqdata->len; info->sqinfo[i].flags |= SQINFO_START; info->sqinfo[i].flags |= SQINFO_STOP; info->sqinfo[i].flags |= SQINFO_OLEN; } if(ajStrGetLen(seq->Selexdata->ss)) { info->sqinfo[i].ss = ajCharNewRes(maxlen+1); p = ajStrGetPtr(seq->Selexdata->ss); q = info->sqinfo[i].ss; while((c==*p)) { if(c=='.' || c==' ' || c=='_' || c=='-') *q++ = c; ++p; } *q = '\0'; info->sqinfo[i].flags |= SQINFO_SS; } } } / * } */ for(i=0; i<n; ++i) { info->sqinfo[i].type = kOtherSeq; if(ajSeqsetIsDna(seqset)) info->sqinfo[i].type = kDNA; if(ajSeqsetIsRna(seqset)) info->sqinfo[i].type = kRNA; if(ajSeqsetIsProt(seqset)) info->sqinfo[i].type = kAmino; info->sqinfo[i].flags |= SQINFO_TYPE; seq = ajSeqsetGetseqSeq(seqset,i); p = ajSeqGetSeqC(seq); cnt = 0; while((c=*p)) { if(!(c=='.' || c==' ' || c=='_' || c=='-' || c=='~')) ++cnt; ++p; } info->sqinfo[i].len = cnt; info->sqinfo[i].flags |= SQINFO_LEN; } *retseqs = seqs; ajStrDel(&tmpstr); return; }
int main(int argc, char **argv) { AjPSeqset seqset; AjPSeqall seqall; AjPSeq queryseq; const AjPSeq targetseq; ajint wordlen; AjPTable wordsTable = NULL; AjPList* matchlist = NULL; AjPFile logfile; AjPFeattable* seqsetftables = NULL; AjPFeattable seqallseqftable = NULL; AjPFeattabOut ftoutforseqsetseq = NULL; AjPFeattabOut ftoutforseqallseq = NULL; AjPAlign align = NULL; AjIList iter = NULL; ajint targetstart; ajint querystart; ajint len; ajuint i, j; ajulong nAllMatches = 0; ajulong sumAllScore = 0; AjBool dumpAlign = ajTrue; AjBool dumpFeature = ajTrue; AjBool checkmode = ajFalse; EmbPWordRK* wordsw = NULL; ajuint npatterns = 0; ajuint seqsetsize; ajuint nmatches; ajuint* nmatchesseqset; ajuint* lastlocation; /* Cursors for Rabin-Karp search. */ /* Shows until what point the query sequence was * scanned for a pattern sequences in the seqset. */ char* paddedheader = NULL; const char* header; AjPStr padding; header = "Pattern %S #pat-sequences #all-matches avg-match-length\n"; padding = ajStrNew(); embInit("wordmatch", argc, argv); wordlen = ajAcdGetInt("wordsize"); seqset = ajAcdGetSeqset("asequence"); seqall = ajAcdGetSeqall("bsequence"); logfile = ajAcdGetOutfile("logfile"); dumpAlign = ajAcdGetToggle("dumpalign"); dumpFeature = ajAcdGetToggle("dumpfeat"); if(dumpAlign) { align = ajAcdGetAlign("outfile"); ajAlignSetExternal(align, ajTrue); } seqsetsize = ajSeqsetGetSize(seqset); ajSeqsetTrim(seqset); AJCNEW0(matchlist, seqsetsize); AJCNEW0(seqsetftables, seqsetsize); AJCNEW0(nmatchesseqset, seqsetsize); if (dumpFeature) { ftoutforseqsetseq = ajAcdGetFeatout("aoutfeat"); ftoutforseqallseq = ajAcdGetFeatout("boutfeat"); } checkmode = !dumpFeature && !dumpAlign; embWordLength(wordlen); ajFmtPrintF(logfile, "Small sequence/file for constructing" " target patterns: %S\n", ajSeqsetGetUsa(seqset)); ajFmtPrintF(logfile, "Large sequence/file to be scanned" " for patterns: %S\n", ajSeqallGetUsa(seqall)); ajFmtPrintF(logfile, "Number of sequences in the patterns file: %u\n", seqsetsize); ajFmtPrintF(logfile, "Pattern/word length: %u\n", wordlen); for(i=0;i<seqsetsize;i++) { targetseq = ajSeqsetGetseqSeq(seqset, i); embWordGetTable(&wordsTable, targetseq); } AJCNEW0(lastlocation, seqsetsize); if(ajTableGetLength(wordsTable)>0) { npatterns = embWordRabinKarpInit(wordsTable, &wordsw, wordlen, seqset); ajFmtPrintF(logfile, "Number of patterns/words found: %u\n", npatterns); while(ajSeqallNext(seqall,&queryseq)) { for(i=0;i<seqsetsize;i++) { lastlocation[i]=0; if (!checkmode) matchlist[i] = ajListstrNew(); } nmatches = embWordRabinKarpSearch( ajSeqGetSeqS(queryseq), seqset, (EmbPWordRK const *)wordsw, wordlen, npatterns, matchlist, lastlocation, checkmode); nAllMatches += nmatches; if (checkmode) continue; for(i=0;i<seqsetsize;i++) { if(ajListGetLength(matchlist[i])>0) { iter = ajListIterNewread(matchlist[i]) ; while(embWordMatchIter(iter, &targetstart, &querystart, &len, &targetseq)) { if(dumpAlign) { ajAlignDefineSS(align, targetseq, queryseq); ajAlignSetScoreI(align, len); /* ungapped alignment means same length * for both sequences */ ajAlignSetSubRange(align, targetstart, 1, len, ajSeqIsReversed(targetseq), ajSeqGetLen(targetseq), querystart, 1, len, ajSeqIsReversed(queryseq), ajSeqGetLen(queryseq)); } } if(dumpAlign) { ajAlignWrite(align); ajAlignReset(align); } if(ajListGetLength(matchlist[i])>0 && dumpFeature) { embWordMatchListConvToFeat(matchlist[i], &seqsetftables[i], &seqallseqftable, targetseq, queryseq); ajFeattableWrite(ftoutforseqallseq, seqallseqftable); ajFeattableDel(&seqallseqftable); } ajListIterDel(&iter); } embWordMatchListDelete(&matchlist[i]); } } /* search completed, now report statistics */ for(i=0;i<npatterns;i++) { sumAllScore += wordsw[i]->lenMatches; for(j=0;j<wordsw[i]->nseqs;j++) nmatchesseqset[wordsw[i]->seqindxs[j]] += wordsw[i]->nSeqMatches[j]; } ajFmtPrintF(logfile, "Number of sequences in the file scanned " "for patterns: %u\n", ajSeqallGetCount(seqall)); ajFmtPrintF(logfile, "Number of all matches: %Lu" " (wordmatch finds exact matches only)\n", nAllMatches); if(nAllMatches>0) { ajFmtPrintF(logfile, "Sum of match lengths: %Lu\n", sumAllScore); ajFmtPrintF(logfile, "Average match length: %.2f\n", sumAllScore*1.0/nAllMatches); ajFmtPrintF(logfile, "\nDistribution of the matches among pattern" " sequences:\n"); ajFmtPrintF(logfile, "-----------------------------------------" "-----------\n"); for(i=0;i<ajSeqsetGetSize(seqset);i++) { if (nmatchesseqset[i]>0) ajFmtPrintF(logfile, "%-42s: %8u\n", ajSeqGetNameC(ajSeqsetGetseqSeq(seqset, i)), nmatchesseqset[i]); ajFeattableWrite(ftoutforseqsetseq, seqsetftables[i]); ajFeattableDel(&seqsetftables[i]); } ajFmtPrintF(logfile, "\nPattern statistics:\n"); ajFmtPrintF(logfile, "-------------------\n"); if(wordlen>7) ajStrAppendCountK(&padding, ' ', wordlen-7); paddedheader = ajFmtString(header,padding); ajFmtPrintF(logfile, paddedheader); for(i=0;i<npatterns;i++) if (wordsw[i]->nMatches>0) ajFmtPrintF(logfile, "%-7s: %12u %12u %17.2f\n", wordsw[i]->word->fword, wordsw[i]->nseqs, wordsw[i]->nMatches, wordsw[i]->lenMatches*1.0/wordsw[i]->nMatches); } } for(i=0;i<npatterns;i++) { for(j=0;j<wordsw[i]->nseqs;j++) AJFREE(wordsw[i]->locs[j]); AJFREE(wordsw[i]->locs); AJFREE(wordsw[i]->seqindxs); AJFREE(wordsw[i]->nnseqlocs); AJFREE(wordsw[i]->nSeqMatches); AJFREE(wordsw[i]); } embWordFreeTable(&wordsTable); AJFREE(wordsw); AJFREE(matchlist); AJFREE(lastlocation); AJFREE(nmatchesseqset); AJFREE(seqsetftables); if(dumpAlign) { ajAlignClose(align); ajAlignDel(&align); } if(dumpFeature) { ajFeattabOutDel(&ftoutforseqsetseq); ajFeattabOutDel(&ftoutforseqallseq); } ajFileClose(&logfile); ajSeqallDel(&seqall); ajSeqsetDel(&seqset); ajSeqDel(&queryseq); ajStrDel(&padding); AJFREE(paddedheader); embExit(); return 0; }
int main(int argc, char **argv) { // initialize EMBASSY info embInitPV("kcentroidfold", argc, argv, "KBWS", "1.0.9"); // soap driver and parameter object struct soap soap; struct ns1__centroidfoldInputParams params; char* jobid; AjPSeqall seqall; // input sequence AjPFile outf; // outfile AjPStr goutfile; // graph file name AjPFile goutf; // graph file handle AjPSeq seq; AjPStr inseq= NULL; AjPStr substr; AjPStr engine; // CONTRAfold, McCaskill, pfold or AUX ajint gamma; // get input/output info seqall= ajAcdGetSeqall("seqall"); outf= ajAcdGetOutfile("outfile"); goutfile= ajAcdGetString("goutfile"); // get parameters engine= ajAcdGetString("engine"); gamma= ajAcdGetInt("gamma"); // set parameters params.model= ajCharNewS(engine); params.gamma= gamma; while (ajSeqallNext(seqall, &seq)) { // initialize soap_init(&soap); inseq= NULL; // convert sequence data to EMBOSS string as fasta format ajStrAppendC(&inseq, ">"); ajStrAppendS(&inseq, ajSeqGetNameS(seq)); ajStrAppendC(&inseq, "\n"); ajStrAppendS(&inseq, ajSeqGetSeqS(seq)); // convert EMBOSS string to char* in C char* in0; in0= ajCharNewS(inseq); // submit query via SOAP and get job ID if (soap_call_ns1__runCentroidfold(&soap, NULL, NULL, in0, ¶ms, &jobid) == SOAP_OK) { } else { soap_print_fault(&soap, stderr); } // polling int check = 0; while (check == 0) { if (soap_call_ns1__checkStatus(&soap, NULL, NULL, jobid, &check) == SOAP_OK) { } else { soap_print_fault(&soap, stderr); } sleep(3); } // get result (sequence alignment text data) char* result; if(soap_call_ns1__getMultiResult(&soap, NULL, NULL, jobid, "out", &result) == SOAP_OK) { // convert result from C char* to EMBOSS string object substr= ajStrNewC(result); // output result (EMBOSS string) to file or STDOUT via EMBOSS ajFmtPrintF(outf, "%S\n", substr); } else { soap_print_fault(&soap, stderr); } // get result (image file) char* image_url; if(soap_call_ns1__getMultiResult(&soap, NULL, NULL, jobid, "png", &image_url) == SOAP_OK) { goutf= ajFileNewOutNameS(goutfile); if (!goutf) { // can not open image output file ajFmtError("Problem writing out image file"); embExitBad(); } if (!gHttpGetBinC(image_url, &goutf)) { // can not download image file ajFmtError("Problem downloading image file"); embExitBad(); } } else { soap_print_fault(&soap, stderr); } } // destruct SOAP driver soap_destroy(&soap); soap_end(&soap); soap_done(&soap); // write output file and destruct outfile object ajFileClose(&outf); // destruct EMBOSS object ajSeqallDel(&seqall); ajSeqDel(&seq); ajStrDel(&substr); ajStrDel(&engine); // exit embExit(); return 0; }
int main(int argc, char *argv[]) { embInitPV("gaminoinfo", argc, argv, "GEMBASSY", "1.0.1"); struct soap soap; AjPSeqall seqall; AjPSeq seq; AjPStr inseq = NULL; AjPStr seqid = NULL; char *in0; char *result; AjPFile outf = NULL; seqall = ajAcdGetSeqall("sequence"); outf = ajAcdGetOutfile("outfile"); while(ajSeqallNext(seqall, &seq)) { soap_init(&soap); inseq = NULL; ajStrAssignS(&seqid, ajSeqGetAccS(seq)); if(!ajStrGetLen(seqid)) ajStrAssignS(&seqid, ajSeqGetNameS(seq)); ajStrAppendC(&inseq, ">"); ajStrAppendS(&inseq, ajSeqGetNameS(seq)); ajStrAppendC(&inseq, "\n"); ajStrAppendS(&inseq, ajSeqGetSeqS(seq)); in0 = ajCharNewS(inseq); if(soap_call_ns1__amino_USCOREinfo( &soap, NULL, NULL, in0, &result ) == SOAP_OK) { ajFmtPrintF(outf, "Sequence: %S\n%s\n", seqid, result); } else { soap_print_fault(&soap, stderr); } soap_destroy(&soap); soap_end(&soap); soap_done(&soap); AJFREE(in0); ajStrDel(&inseq); } ajFileClose(&outf); ajSeqallDel(&seqall); ajSeqDel(&seq); embExit(); return 0; }
static void primersearch_scan_seq(const Primer primdata, const AjPSeq seq, AjBool reverse) { AjPStr seqstr = NULL; AjPStr revstr = NULL; AjPStr seqname = NULL; ajuint fhits = 0; ajuint rhits = 0; AjPList fhits_list = NULL; AjPList rhits_list = NULL; /* initialise variables for search */ ajStrAssignC(&seqname,ajSeqGetNameC(seq)); ajStrAssignS(&seqstr, ajSeqGetSeqS(seq)); ajStrAssignS(&revstr, ajSeqGetSeqS(seq)); ajStrFmtUpper(&seqstr); ajStrFmtUpper(&revstr); ajSeqstrReverse(&revstr); fhits_list = ajListNew(); rhits_list = ajListNew(); if(!reverse) { /* test OligoA against forward sequence, and OligoB against reverse */ embPatFuzzSearch(primdata->forward->type, ajSeqGetBegin(seq), primdata->forward->patstr, seqname, seqstr, fhits_list, primdata->forward->len, primdata->forward->mm, primdata->forward->amino, primdata->forward->carboxyl, primdata->forward->buf, primdata->forward->off, primdata->forward->sotable, primdata->forward->solimit, primdata->forward->re, primdata->forward->skipm, &fhits, primdata->forward->real_len, &(primdata->forward->tidy)); if(fhits) embPatFuzzSearch(primdata->reverse->type, ajSeqGetBegin(seq), primdata->reverse->patstr, seqname, revstr, rhits_list, primdata->reverse->len, primdata->reverse->mm, primdata->reverse->amino, primdata->reverse->carboxyl, primdata->reverse->buf, primdata->reverse->off, primdata->reverse->sotable, primdata->reverse->solimit, primdata->reverse->re, primdata->reverse->skipm, &rhits, primdata->reverse->real_len, &(primdata->reverse->tidy)); } else { /*test OligoB against forward sequence, and OligoA against reverse */ embPatFuzzSearch(primdata->reverse->type, ajSeqGetBegin(seq), primdata->reverse->patstr, seqname, seqstr, fhits_list, primdata->reverse->len, primdata->reverse->mm, primdata->reverse->amino, primdata->reverse->carboxyl, primdata->reverse->buf, primdata->reverse->off, primdata->reverse->sotable, primdata->reverse->solimit, primdata->reverse->re, primdata->reverse->skipm, &fhits, primdata->reverse->real_len, &(primdata->reverse->tidy)); if(fhits) embPatFuzzSearch(primdata->forward->type, ajSeqGetBegin(seq), primdata->forward->patstr, seqname, revstr, rhits_list, primdata->forward->len, primdata->forward->mm, primdata->forward->amino, primdata->forward->carboxyl, primdata->forward->buf, primdata->forward->off, primdata->forward->sotable, primdata->forward->solimit, primdata->forward->re, primdata->forward->skipm, &rhits, primdata->forward->real_len, &(primdata->forward->tidy)); } if(fhits && rhits) /* get amplimer length(s) and write out the hit */ primersearch_store_hits(primdata, fhits_list, rhits_list, seq, reverse); /* tidy up */ primersearch_clean_hitlist(&fhits_list); primersearch_clean_hitlist(&rhits_list); ajStrDel(&seqstr); ajStrDel(&revstr); ajStrDel(&seqname); return; }
int main(int argc, char *argv[]) { embInitPV("gpalindrome", argc, argv, "GEMBASSY", "1.0.3"); struct soap soap; struct ns1__palindromeInputParams params; AjPSeqall seqall; AjPSeq seq; AjPStr inseq = NULL; AjPStr seqid = NULL; ajint shortest = 0; ajint loop = 0; AjBool gtmatch = 0; char *in0; char *result; AjPFile outf = NULL; seqall = ajAcdGetSeqall("sequence"); shortest = ajAcdGetInt("shortest"); loop = ajAcdGetInt("loop"); gtmatch = ajAcdGetBoolean("gtmatch"); outf = ajAcdGetOutfile("outfile"); params.shortest = shortest; params.loop = loop; params.gtmatch = gtmatch; params.output = "f"; while(ajSeqallNext(seqall, &seq)) { soap_init(&soap); inseq = NULL; ajStrAppendC(&inseq, ">"); ajStrAppendS(&inseq, ajSeqGetNameS(seq)); ajStrAppendC(&inseq, "\n"); ajStrAppendS(&inseq, ajSeqGetSeqS(seq)); ajStrAssignS(&seqid, ajSeqGetAccS(seq)); in0 = ajCharNewS(inseq); if(soap_call_ns1__palindrome( &soap, NULL, NULL, in0, ¶ms, &result ) == SOAP_OK) { ajFmtPrintF(outf, "Sequence: %S\n", seqid); if(!gFileOutURLC(result, &outf)) { ajDie("File downloading error from:\n%s\n", result); embExitBad(); } } else { soap_print_fault(&soap, stderr); } soap_destroy(&soap); soap_end(&soap); soap_done(&soap); AJFREE(in0); ajStrDel(&inseq); } ajFileClose(&outf); ajSeqallDel(&seqall); ajSeqDel(&seq); ajStrDel(&seqid); embExit(); return 0; }
int main(int argc, char **argv) { AjPSeqall seqall; AjPFile primfile; AjPStr rdline = NULL; Primer primdata; AjPStrTok handle = NULL; AjPList primList = NULL; embInit("stssearch", argc, argv); primfile = ajAcdGetInfile("infile"); out = ajAcdGetOutfile("outfile"); seqall = ajAcdGetSeqall("seqall"); while(ajReadlineTrim(primfile, &rdline)) { if(ajStrGetCharFirst(rdline) == '#') continue; if(ajStrSuffixC(rdline, "..")) continue; AJNEW(primdata); primdata->Name = NULL; primdata->Oligoa = NULL; primdata->Oligob = NULL; handle = ajStrTokenNewC(rdline, " \t"); ajStrTokenNextParse(&handle, &primdata->Name); if(!(nprimers % 1000)) ajDebug("Name [%d]: '%S'\n", nprimers, primdata->Name); ajStrTokenNextParse(&handle, &primdata->Oligoa); ajStrFmtUpper(&primdata->Oligoa); primdata->Prima = ajRegComp(primdata->Oligoa); ajStrTokenNextParse(&handle, &primdata->Oligob); ajStrFmtUpper(&primdata->Oligob); primdata->Primb = ajRegComp(primdata->Oligob); ajStrTokenDel(&handle); if(!nprimers) primList = ajListNew(); ajListPushAppend(primList, primdata); nprimers++; } if(!nprimers) ajFatal("No primers read\n"); ajDebug("%d primers read\n", nprimers); while(ajSeqallNext(seqall, &seq)) { ajSeqFmtUpper(seq); ajStrAssignS(&seqstr, ajSeqGetSeqS(seq)); ajStrAssignS(&revstr, ajSeqGetSeqS(seq)); ajSeqstrReverse(&revstr); ajDebug("Testing: %s\n", ajSeqGetNameC(seq)); ntests = 0; ajListMap(primList, stssearch_primTest, NULL); } ajFileClose(&out); ajSeqallDel(&seqall); ajSeqDel(&seq); ajFileClose(&out); ajStrDel(&revstr); ajStrDel(&seqstr); ajFileClose(&primfile); ajListMap(primList, stssearch_primDel, NULL); ajListFree(&primList); ajStrDel(&rdline); embExit(); return 0; }
int main(int argc, char **argv) { AjPSeqall nucseq; /* input nucleic sequences */ AjPSeqset protseq; /* input aligned protein sequences */ AjPSeqout seqout; AjPSeq nseq; /* next nucleic sequence to align */ const AjPSeq pseq; /* next protein sequence use in alignment */ AjPTrn trnTable; AjPSeq pep; /* translation of nseq */ AjPStr tablelist; ajint table; AjPSeqset outseqset; /* set of aligned nucleic sequences */ ajint proteinseqcount = 0; AjPStr degapstr = NULL; /* used to check if it matches with START removed */ AjPStr degapstr2 = NULL; AjPStr codon = NULL; /* holds temporary codon to check if is START */ char aa; /* translated putative START codon */ ajint type; /* returned type of the putative START codon */ /* start position of guide protein in translation */ ajlong pos = 0; AjPSeq newseq = NULL; /* output aligned nucleic sequence */ ajint frame; embInit("tranalign", argc, argv); nucseq = ajAcdGetSeqall("asequence"); protseq = ajAcdGetSeqset("bsequence"); tablelist = ajAcdGetListSingle("table"); seqout = ajAcdGetSeqoutset("outseq"); outseqset = ajSeqsetNew(); degapstr = ajStrNew(); /* initialise the translation table */ ajStrToInt(tablelist, &table); trnTable = ajTrnNewI(table); ajSeqsetFill(protseq); while(ajSeqallNext(nucseq, &nseq)) { if((pseq = ajSeqsetGetseqSeq(protseq, proteinseqcount++)) == NULL) ajErr("No guide protein sequence available for " "nucleic sequence %S", ajSeqGetNameS(nseq)); ajDebug("Aligning %S and %S\n", ajSeqGetNameS(nseq), ajSeqGetNameS(pseq)); /* get copy of pseq string with no gaps */ ajStrAssignS(°apstr, ajSeqGetSeqS(pseq)); ajStrRemoveGap(°apstr); /* ** for each translation frame look for subset of pep that ** matches pseq */ for(frame = 1; frame <4; frame++) { ajDebug("trying frame %d\n", frame); pep = ajTrnSeqOrig(trnTable, nseq, frame); degapstr2 = ajStrNew(); ajStrAssignRef(°apstr2, degapstr); pos = ajStrFindCaseS(ajSeqGetSeqS(pep), degapstr); /* ** we might have a START codon that should be translated as 'M' ** we need to check if there is a match after a possible START ** codon */ if(pos == -1 && ajStrGetLen(degapstr) > 1 && (ajStrGetPtr(degapstr)[0] == 'M' || ajStrGetPtr(degapstr)[0] == 'm')) { /* see if pep minus the first character is a match */ ajStrCutStart(°apstr2, 1); pos = ajStrFindCaseS(ajSeqGetSeqS(pep), degapstr2); /* ** pos is >= 1 if we have a match that is after the first ** residue */ if(pos >= 1) { /* point back at the putative START Methionine */ pos--; /* test if first codon is a START */ codon = ajStrNew(); ajStrAssignSubS(&codon, ajSeqGetSeqS(nseq), (pos*3)+frame-1, (pos*3)+frame+2); type = ajTrnCodonstrTypeS(trnTable, codon, &aa); if(type != 1) { /* first codon is not a valid START, force a mismatch */ pos = -1; } ajStrDel(&codon); } else { /* force 'pos == 0' to be treated as a mismatch */ pos = -1; } } ajStrDel(°apstr2); ajSeqDel(&pep); if(pos != -1) break; } if(pos == -1) ajErr("Guide protein sequence %S not found in nucleic sequence %S", ajSeqGetNameS(pseq), ajSeqGetNameS(nseq)); else { ajDebug("got a match with frame=%d\n", frame); /* extract the coding region of nseq with gaps */ newseq = ajSeqNew(); ajSeqSetNuc(newseq); ajSeqAssignNameS(newseq, ajSeqGetNameS(nseq)); ajSeqAssignDescS(newseq, ajSeqGetDescS(nseq)); tranalign_AddGaps(newseq, nseq, pseq, (pos*3)+frame-1); /* output the gapped nucleic sequence */ ajSeqsetApp(outseqset, newseq); ajSeqDel(&newseq); } ajStrRemoveWhiteExcess(°apstr); } ajSeqoutWriteSet(seqout, outseqset); ajSeqoutClose(seqout); ajTrnDel(&trnTable); ajSeqsetDel(&outseqset); ajStrDel(°apstr); ajStrDel(°apstr2); ajSeqallDel(&nucseq); ajSeqDel(&nseq); ajSeqoutDel(&seqout); ajSeqsetDel(&protseq); ajStrDel(&tablelist); embExit(); return 0; }
int main(int argc, char *argv[]) { embInitPV("gcgr", argc, argv, "GEMBASSY", "1.0.1"); struct soap soap; struct ns1__cgrInputParams params; AjPSeqall seqall; AjPSeq seq; AjPStr inseq = NULL; AjPStr seqid = NULL; ajint width = 0; AjPFile outf = NULL; AjPStr filename = NULL; AjPStr outfname = NULL; AjPStr format = NULL; ajint i; char *in0; char *result; seqall = ajAcdGetSeqall("sequence"); width = ajAcdGetInt("width"); filename = ajAcdGetString("goutfile"); format = ajAcdGetString("format"); params.width = width; i = 0; while(ajSeqallNext(seqall, &seq)) { soap_init(&soap); inseq = NULL; ajStrAppendC(&inseq, ">"); ajStrAppendS(&inseq, ajSeqGetAccS(seq)); ajStrAppendC(&inseq, "\n"); ajStrAppendS(&inseq, ajSeqGetSeqS(seq)); ajStrAssignS(&seqid, ajSeqGetAccS(seq)); in0 = ajCharNewS(inseq); if(soap_call_ns1__cgr( &soap, NULL, NULL, in0, ¶ms, &result ) == SOAP_OK) { ++i; outfname = ajStrNewS(ajFmtStr("%S.%d.%S", filename, i, format)); outf = ajFileNewOutNameS(outfname); if(!outf) { ajDie("File open error\n"); } if(!ajStrMatchC(format, "png")) { if(!gHttpConvertC(result, &outf, ajStrNewC("png"), format)) { ajDie("File downloading error from:\n%s\n", result); } else { ajFmtPrint("Created %S\n", outfname); } } else { if(!gHttpGetBinC(result, &outf)) { ajDie("File downloading error from:\n%s\n", result); } else { ajFmtPrint("Created %S\n", outfname); } } ajStrDel(&outfname); } else { soap_print_fault(&soap, stderr); } soap_destroy(&soap); soap_end(&soap); soap_done(&soap); AJFREE(in0); ajStrDel(&inseq); } ajSeqallDel(&seqall); ajSeqDel(&seq); ajStrDel(&filename); embExit(); }
static void extractfeat_GetRegionPad(const AjPSeq seq, AjPStr *featstr, ajint start, ajint end, AjBool sense, AjBool beginning) { ajint tmp; ajint pad; AjPStr result; ajDebug("In extractfeat_GetRegionPad start=%d, end=%d\n", start, end); result = ajStrNew(); if(start > end) return; if(start < 0) { pad = -start; if(ajSeqIsNuc(seq)) ajStrAppendCountK(&result, 'N', pad); else ajStrAppendCountK(&result, 'X', pad); start = 0; } if(end > (ajint) ajSeqGetLen(seq)-1) tmp = ajSeqGetLen(seq)-1; else tmp = end; if(start <= (ajint) ajSeqGetLen(seq) && tmp >= 0) { ajDebug("Get subsequence %d-%d\n", start, tmp); ajStrAppendSubS(&result, ajSeqGetSeqS(seq), start, tmp); ajDebug("result=%S\n", result); } if(end > (ajint) ajSeqGetLen(seq)-1) { pad = end - ajSeqGetLen(seq)+1; if(ajSeqIsNuc(seq)) ajStrAppendCountK(&result, 'N', pad); else ajStrAppendCountK(&result, 'X', pad); ajDebug("result=%S\n", result); } /* if feature was in reverse sense, then get reverse complement */ if(!sense) { ajDebug("get reverse sense of subsequence\n"); ajSeqstrReverse(&result); ajDebug("result=%S\n", result); } if(beginning) { ajDebug("Prepend to featstr: %S\n", result); ajStrInsertS(featstr, 0, result); } else { ajDebug("Append to featstr: %S\n", result); ajStrAppendS(featstr, result); } ajDebug("featstr=%S\n", *featstr); ajStrDel(&result); return; }
int main(int argc, char **argv) { AjPSeqall seqall; AjPSeqout seqout; AjPSeq seq = NULL; AjPStr str = NULL; AjPStr desc = NULL; ajint tail3; ajint tail5 = 0; ajint minlength; ajint mismatches; AjBool reverse; AjBool fiveprime; AjBool cvttolower; embInit("trimest", argc, argv); seqall = ajAcdGetSeqall("sequence"); seqout = ajAcdGetSeqoutall("outseq"); minlength = ajAcdGetInt("minlength"); mismatches = ajAcdGetInt("mismatches"); reverse = ajAcdGetBoolean("reverse"); fiveprime = ajAcdGetBoolean("fiveprime"); cvttolower = ajAcdGetToggle("tolower"); str = ajStrNew(); while(ajSeqallNext(seqall, &seq)) { /* get sequence description */ ajStrAssignS(&desc, ajSeqGetDescS(seq)); /* get positions to cut in 5' poly-T and 3' poly-A tails */ if(fiveprime) tail5 = trimest_get_tail(seq, 5, minlength, mismatches); tail3 = trimest_get_tail(seq, 3, minlength, mismatches); /* get a COPY of the sequence string */ ajStrAssignS(&str, ajSeqGetSeqS(seq)); /* cut off longest of 3' or 5' tail */ if(tail5 > tail3) { /* if 5' poly-T tail, then reverse the sequence */ ajDebug("Tail=%d\n", tail5); if(cvttolower) trimest_tolower(&str, 0, tail5-1); else ajStrKeepRange(&str, tail5, ajSeqGetLen(seq)-1); ajStrAppendC(&desc, " [poly-T tail removed]"); } else if(tail3 > tail5) { /* remove 3' poly-A tail */ ajDebug("Tail=%d\n", tail3); if(cvttolower) trimest_tolower(&str, ajSeqGetLen(seq)-tail3, ajSeqGetLen(seq)); else ajStrKeepRange(&str, 0, ajSeqGetLen(seq)-tail3-1); ajStrAppendC(&desc, " [poly-A tail removed]"); } /* write sequence out */ ajSeqAssignSeqS(seq, str); /* reverse complement if poly-T found */ if(tail5 > tail3 && reverse) { ajSeqReverseForce(seq); ajStrAppendC(&desc, " [reverse complement]"); } /* set description */ ajSeqAssignDescS(seq, desc); ajSeqoutWriteSeq(seqout, seq); } ajSeqoutClose(seqout); ajStrDel(&str); ajStrDel(&desc); ajSeqallDel(&seqall); ajSeqDel(&seq); ajSeqoutDel(&seqout); embExit(); return 0; }
int main(int argc, char **argv) { AjPSeq seq; AjPGraph graph = 0; AjPFile outf = NULL; AjPFile file = NULL; AjPStr buffer = NULL; float twist[4][4][4]; float roll[4][4][4]; float tilt[4][4][4]; float rbend; float rcurve; float bendscale; float curvescale; float twistsum = (float) 0.0; float pi = (float) 3.14159; float pifac = (pi/(float) 180.0); float pi2 = pi/(float) 2.0; ajint *iseq = NULL; float *x; float *y; float *xave; float *yave; float *curve; float *bend; const char *ptr; ajint i; ajint ii; ajint k; ajint j; char residue[2]; float maxbend; float minbend; float bendfactor; float maxcurve; float mincurve; float curvefactor; float fxp; float fyp; float yincr; float yy1; ajint ixlen; ajint iylen; ajint ixoff; ajint iyoff; float ystart; float defheight; float currentheight; ajint count; ajint portrait = 0; ajint title = 0; ajint numres; ajint ibeg; ajint iend; ajint ilen; AjPStr sstr = NULL; ajint ipos; float dx; float dy; float rxsum; float rysum; float yp1; float yp2; double td; embInit("banana", argc, argv); seq = ajAcdGetSeq("sequence"); file = ajAcdGetDatafile("anglesfile"); outf = ajAcdGetOutfile("outfile"); graph = ajAcdGetGraph("graph"); numres = ajAcdGetInt("residuesperline"); ibeg = ajSeqGetBegin(seq); iend = ajSeqGetEnd(seq); ajStrAssignSubS(&sstr, ajSeqGetSeqS(seq), ibeg-1, iend-1); ilen = ajStrGetLen(sstr); AJCNEW0(iseq,ilen+1); AJCNEW0(x,ilen+1); AJCNEW0(y,ilen+1); AJCNEW0(xave,ilen+1); AJCNEW0(yave,ilen+1); AJCNEW0(curve,ilen+1); AJCNEW0(bend,ilen+1); ptr= ajStrGetPtr(sstr); for(ii=0;ii<ilen;ii++) { if(*ptr=='A' || *ptr=='a') iseq[ii+1] = 0; else if(*ptr=='T' || *ptr=='t') iseq[ii+1] = 1; else if(*ptr=='G' || *ptr=='g') iseq[ii+1] = 2; else if(*ptr=='C' || *ptr=='c') iseq[ii+1] = 3; else ajErr("%c is not an ATCG hence not valid",*ptr); ptr++; } if(!file) ajErr("Banana failed to open angle file"); ajReadline(file,&buffer); /* 3 junk lines */ ajReadline(file,&buffer); ajReadline(file,&buffer); for(k=0;k<4;k++) for(ii=0;ii<4;ii++) { if(ajReadline(file,&buffer)) { sscanf(ajStrGetPtr(buffer),"%f,%f,%f,%f", &twist[ii][0][k],&twist[ii][1][k],&twist[ii][2][k], &twist[ii][3][k]); } else ajErr("Error reading angle file"); for(j=0;j<4;j++) twist[ii][j][k] *= pifac; } for(k=0;k<4;k++) for(ii=0;ii<4;ii++) if(ajReadline(file,&buffer)) { sscanf(ajStrGetPtr(buffer),"%f,%f,%f,%f",&roll[ii][0][k], &roll[ii][1][k],&roll[ii][2][k],&roll[ii][3][k]); } else ajErr("Error reading angle file"); for(k=0;k<4;k++) for(ii=0;ii<4;ii++) if(ajReadline(file,&buffer)) sscanf(ajStrGetPtr(buffer),"%f,%f,%f,%f",&tilt[ii][0][k], &tilt[ii][1][k],&tilt[ii][2][k],&tilt[ii][3][k]); else ajErr("Error reading angle file"); if(ajReadline(file,&buffer)) sscanf(ajStrGetPtr(buffer),"%f,%f,%f,%f",&rbend,&rcurve, &bendscale,&curvescale); else ajErr("Error reading angle file"); ajFileClose(&file); ajStrDel(&buffer); for(ii=1;ii<ilen-1;ii++) { twistsum += twist[iseq[ii]][iseq[ii+1]][iseq[ii+2]]; dx = (roll[iseq[ii]][iseq[ii+1]][iseq[ii+2]]*sinfban(twistsum)) + (tilt[iseq[ii]][iseq[ii+1]][iseq[ii+2]]*sinfban(twistsum-pi2)); dy = roll[iseq[ii]][iseq[ii+1]][iseq[ii+2]]*cosfban(twistsum) + tilt[iseq[ii]][iseq[ii+1]][iseq[ii+2]]*cosfban(twistsum-pi2); x[ii+1] = x[ii]+dx; y[ii+1] = y[ii]+dy; } for(ii=6;ii<ilen-6;ii++) { rxsum = 0.0; rysum = 0.0; for(k=-4;k<=4;k++) { rxsum+=x[ii+k]; rysum+=y[ii+k]; } rxsum+=(x[ii+5]*(float)0.5); rysum+=(y[ii+5]*(float)0.5); rxsum+=(x[ii-5]*(float)0.5); rysum+=(y[ii-5]*(float)0.5); xave[ii] = rxsum*(float)0.1; yave[ii] = rysum*(float)0.1; } for(i=(ajint)rbend+1;i<=ilen-(ajint)rbend-1;i++) { td = sqrt(((x[i+(ajint)rbend]-x[i-(ajint)rbend])* (x[i+(ajint)rbend]-x[i-(ajint)rbend])) + ((y[i+(ajint)rbend]-y[i-(ajint)rbend])* (y[i+(ajint)rbend]-y[i-(ajint)rbend]))); bend[i] = (float) td; bend[i]*=bendscale; } for(i=(ajint)rcurve+6;i<=ilen-(ajint)rcurve-6;i++) { td = sqrt(((xave[i+(ajint)rcurve]- xave[i-(ajint)rcurve])*(xave[i+(ajint)rcurve]- xave[i-(ajint)rcurve]))+ ((yave[i+(ajint)rcurve]-yave[i-(ajint)rcurve])* (yave[i+(ajint)rcurve]-yave[i-(ajint)rcurve]))); curve[i] = (float) td; } if(outf) { ajFmtPrintF(outf,"Base Bend Curve\n"); ptr = ajStrGetPtr(sstr); for(ii=1;ii<=ilen;ii++) { ajFmtPrintF(outf,"%c %6.1f %6.1f\n", *ptr, bend[ii], curve[ii]); ptr++; } ajFileClose(&outf); } if(graph) { maxbend = minbend = 0.0; maxcurve = mincurve = 0.0; for(ii=1;ii<=ilen;ii++) { if(bend[ii] > maxbend) maxbend = bend[ii]; if(bend[ii] < minbend) minbend = bend[ii]; if(curve[ii] > maxcurve) maxcurve = curve[ii]; if(curve[ii] < mincurve) mincurve = curve[ii]; } ystart = 75.0; ajGraphAppendTitleS(graph, ajSeqGetUsaS(seq)); ajGraphicsSetPagesize(960, 768); ajGraphOpenWin(graph,(float)-1.0, (float)numres+(float)10.0, (float)0.0, ystart+(float)5.0); ajGraphicsGetParamsPage(&fxp,&fyp,&ixlen,&iylen,&ixoff,&iyoff); if(portrait) { ixlen = 768; iylen = 960; } else { ixlen = 960; iylen = 768; } ajGraphicsGetCharsize(&defheight,¤theight); if(!currentheight) { defheight = currentheight = (float) 4.440072; currentheight = defheight * ((float)ixlen/ ((float)(numres)*(currentheight+(float)1.0))) /currentheight; } ajGraphicsSetCharscale(((float)ixlen/((float)(numres)* (currentheight+(float)1.0)))/ currentheight); ajGraphicsGetCharsize(&defheight,¤theight); yincr = (currentheight + (float)3.0)*(float)0.3; if(!title) yy1 = ystart; else yy1 = ystart-(float)5.0; count = 1; residue[1]='\0'; bendfactor = (3*yincr)/maxbend; curvefactor = (3*yincr)/maxcurve; ptr = ajStrGetPtr(sstr); yy1 = yy1-(yincr*((float)5.0)); for(ii=1;ii<=ilen;ii++) { if(count > numres) { yy1 = yy1-(yincr*((float)5.0)); if(yy1<1.0) { if(!title) yy1=ystart; else yy1 = ystart-(float)5.0; yy1 = yy1-(yincr*((float)5.0)); ajGraphNewpage(graph,AJFALSE); } count = 1; } residue[0] = *ptr; ajGraphicsDrawposTextAtend((float)(count)+(float)2.0,yy1,residue); if(ii>1 && ii < ilen) { yp1 = yy1+yincr + (bend[ii]*bendfactor); yp2 = yy1+yincr + (bend[ii+1]*bendfactor); ajGraphicsDrawposLine((float)count+(float)1.5,yp1, (float)(count)+(float)2.5,yp2); } ipos = ilen-(ajint)rcurve-7; if(ipos < 0) ipos = 0; if(ii>rcurve+5 && ii<ipos) { yp1 = yy1+yincr + (curve[ii]*curvefactor); yp2 = yy1+yincr + (curve[ii+1]*curvefactor); ajGraphicsDrawposLine((float)count+(float)1.7,yp1, (float)(count)+(float)2.3,yp2); } ajGraphicsDrawposLine((float)count+(float)1.5,yy1+yincr, (float)(count)+(float)2.5,yy1+yincr); count++; ptr++; } ajGraphicsClose(); } AJFREE(iseq); AJFREE(x); AJFREE(y); AJFREE(xave); AJFREE(yave); AJFREE(curve); AJFREE(bend); ajStrDel(&sstr); ajSeqDel(&seq); ajFileClose(&file); ajFileClose(&outf); ajGraphxyDel(&graph); embExit(); return 0; }
void embPatternRegexSearch (AjPFeattable ftable, const AjPSeq seq, const AjPPatternRegex pat, AjBool reverse) { ajint pos=0; ajint off; ajint len; AjPFeature sf = NULL; AjPStr substr = NULL; AjPStr seqstr = NULL; AjPStr tmpstr = NULL; AjPStr tmp = ajStrNew(); AjPRegexp patexp = ajPatternRegexGetCompiled(pat); ajint adj; AjBool isreversed; AjPSeq revseq; ajint seqlen; seqlen = ajSeqGetLen(seq); if(!seqlen) return; isreversed = ajSeqIsReversedTrue(seq); if(isreversed) seqlen += ajSeqGetOffset(seq); pos = ajSeqGetBeginTrue(seq); adj = ajSeqGetEndTrue(seq); if(!ajStrGetLen(featMotifProt)) ajStrAssignC(&featMotifProt, "SO:0001067"); if(!ajStrGetLen(featMotifNuc)) ajStrAssignC(&featMotifNuc, "SO:0000714"); /*ajDebug("embPatternRegexSearch pos: %d adj: %d reverse: %B\n", pos, adj, reverse, isreversed);*/ /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n", seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq), ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq));*/ if (reverse) { revseq = ajSeqNewSeq(seq); ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq), pos-1, adj-1); ajSeqstrReverse(&seqstr); } ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq), pos-1, adj-1); ajStrFmtUpper(&seqstr); while(ajStrGetLen(seqstr) && ajRegExec(patexp, seqstr)) { off = ajRegOffset(patexp); len = ajRegLenI(patexp, 0); if(off || len) { ajRegSubI(patexp, 0, &substr); ajRegPost(patexp, &tmp); ajStrAssignS(&seqstr, substr); ajStrAppendS(&seqstr, tmp); pos += off; /*ajDebug("match pos: %d adj: %d len: %d off:%d\n", pos, adj, len, off);*/ if (reverse) sf = ajFeatNew(ftable, NULL, featMotifNuc, adj - pos - len + 2, adj - pos + 1, 0.0, '-', 0); else { if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable)) sf = ajFeatNewProt(ftable, NULL, featMotifProt, pos, pos + len - 1, 0.0); else sf = ajFeatNew(ftable, NULL, featMotifNuc, pos, pos + len - 1, 0.0, '.', 0); } if(isreversed) ajFeatReverse(sf, seqlen); ajFmtPrintS (&tmpstr,"*pat %S: %S", ajPatternRegexGetName(pat), ajPatternRegexGetPattern(pat)); ajFeatTagAdd (sf,NULL,tmpstr); pos += 1; ajStrCutStart(&seqstr, 1); } else { pos++; ajStrCutStart(&seqstr, 1); } } ajStrDel(&tmpstr); ajStrDel(&tmp); ajStrDel(&substr); ajStrDel(&seqstr); if(reverse) ajSeqDel(&revseq); return; }
int main(int argc, char *argv[]) { embInitPV("gseqinfo", argc, argv, "GEMBASSY", "1.0.1"); struct soap soap; AjPSeqall seqall; AjPSeq seq; AjPStr inseq = NULL; AjPStr seqid = NULL; AjPStr tmp = NULL; AjPStr parse = NULL; AjPStr numA = NULL; AjPStr numT = NULL; AjPStr numG = NULL; AjPStr numC = NULL; AjPStrTok handle = NULL; ajint n; char *in0; char *result; AjBool show = 0; AjPFile outf = NULL; seqall = ajAcdGetSeqall("sequence"); outf = ajAcdGetOutfile("outfile"); while(ajSeqallNext(seqall, &seq)) { soap_init(&soap); inseq = NULL; ajStrAppendC(&inseq, ">"); ajStrAppendS(&inseq, ajSeqGetNameS(seq)); ajStrAppendC(&inseq, "\n"); ajStrAppendS(&inseq, ajSeqGetSeqS(seq)); ajStrAssignS(&seqid, ajSeqGetAccS(seq)); in0 = ajCharNewS(inseq); if(soap_call_ns1__seqinfo( &soap, NULL, NULL, in0, &result ) == SOAP_OK) { tmp = ajStrNewC(result); ajStrExchangeCC(&tmp, "<", "\n"); ajStrExchangeCC(&tmp, ">", "\n"); handle = ajStrTokenNewC(tmp, "\n"); while(ajStrTokenNextParse(handle, &parse)) { if(ajStrIsInt(parse)) if(!numA) numA = ajStrNewS(parse); else if(!numT) numT = ajStrNewS(parse); else if(!numG) numG = ajStrNewS(parse); else if(!numC) numC = ajStrNewS(parse); } if(show) ajFmtPrint("Sequence: %S A: %S T: %S G: %S C: %S\n", seqid, numA, numT, numG, numC); else ajFmtPrintF(outf, "Sequence: %S A: %S T: %S G: %S C: %S\n", seqid, numA, numT, numG, numC); } else { soap_print_fault(&soap, stderr); } soap_destroy(&soap); soap_end(&soap); soap_done(&soap); AJFREE(in0); ajStrDel(&inseq); } ajSeqallDel(&seqall); ajSeqDel(&seq); ajStrDel(&seqid); embExit(); return 0; }
AjPSeqout get_orf( AjPSeqout seqout, AjPSeqall seqall, AjPStr tablestr, ajuint minsize, ajuint maxsize, AjPStr findstr, AjBool methionine, AjBool circular, AjBool reverse, ajint around ) { ajint table; ajint find; AjPSeq seq = NULL; AjPTrn trnTable; AjPStr sseq = NULL; /* sequence string */ /* ORF number to append to name of sequence to create unique name */ ajint orf_no; AjBool sense; /* ajTrue = forward sense */ ajint len; /* initialise the translation table */ ajStrToInt(tablestr, &table); trnTable = ajTrnNewI(table); /* what sort of ORF are we looking for */ ajStrToInt(findstr, &find); /* ** get the minimum size converted to protein length if storing ** protein sequences */ if (find == P_STOP2STOP || find == P_START2STOP || find == AROUND_START) { minsize /= 3; maxsize /= 3; } while (ajSeqallNext(seqall, &seq)) { orf_no = 1; /* number of the next ORF */ sense = ajTrue; /* forward sense initially */ /* get the length of the sequence */ len = ajSeqGetLen(seq); /* ** if the sequence is circular, append it to itself to triple its ** length so can deal easily with wrapped ORFs, but don't update ** len */ if (circular) { ajStrAssignS(&sseq, ajSeqGetSeqS(seq)); ajStrAppendS(&sseq, ajSeqGetSeqS(seq)); ajStrAppendS(&sseq, ajSeqGetSeqS(seq)); ajSeqAssignSeqS(seq, sseq); } /* find the ORFs */ getorf_FindORFs(seq, len, trnTable, minsize, maxsize, seqout, sense, circular, find, &orf_no, methionine, around, &record); /* now reverse complement the sequence and do it again */ if (reverse) { sense = ajFalse; ajSeqReverseForce(seq); getorf_FindORFs(seq, len, trnTable, minsize, maxsize, seqout, sense, circular, find, &orf_no, methionine, around); } } ajTrnDel(&trnTable); ajSeqDel(&seq); ajStrDel(&sseq); }