int main(int argc, char **argv) { AjPSeqset seqset; AjPSeqall seqall; AjPSeq seq; ajint i = 0; AjPStr kimout = NULL; AjPStr dir = NULL; AjPFile obofile = NULL; AjPFile resfile = NULL; AjPDir taxdir = NULL; embInit("ajtest", argc, argv); seqall = ajAcdGetSeqall ("sequence"); seqset = ajAcdGetSeqset ("bsequence"); dir = ajAcdGetOutdirName("outdir"); obofile = ajAcdGetInfile ("obofile"); taxdir = ajAcdGetDirectory ("taxdir"); resfile = ajAcdGetInfile ("dbxreffile"); ajUser("Directory '%S'", dir); ajUser("Set of %d", ajSeqsetGetSize(seqset)); while(ajSeqallNext (seqall, &seq)) { ajUser ("%3d <%S>", i++, ajSeqGetUsaS(seq)); ajFmtPrintS(&kimout, "kim%d.out", i); ajtest_kim (kimout, seq); } ajSeqDel(&seq); ajSeqallDel(&seqall); ajSeqsetDel(&seqset); ajStrDel(&kimout); ajStrDel(&dir); if(taxdir) ajTaxLoad(taxdir); ajDirDel(&taxdir); if(obofile) ajOboParseObofile(obofile, ""); ajFileClose(&obofile); if(resfile) ajResourceParse(resfile, ""); ajFileClose(&resfile); embExit(); return 0; }
static int infoalign_Getrefseq(const AjPStr refseq, const AjPSeqset seqset) { ajint i; const AjPSeq seq; for(i=0; i<(ajint)ajSeqsetGetSize(seqset); i++) { seq = ajSeqsetGetseqSeq(seqset, i); if(!ajStrCmpS(ajSeqGetNameS(seq), refseq)) return i; } /* not a name of a sequence, so it must be a number */ if(!ajStrToInt(refseq, &i)) ajFatal("Reference sequence is not a sequence ID or a number: %S", refseq); if(i < 0 || i > (ajint) ajSeqsetGetSize(seqset)) ajFatal("Reference sequence number < 0 or > number " "of input sequences: %d", i); return i-1; }
/* @funcstatic skipredundant_SeqsetToList ************************************** ** ** Builds a list of sequences from a sequence set. ** The sequences are NOT copied (only a reference is pushed onto the list) ** ** @param [u] list [AjPList] List ** @param [w] seqset [AjPSeqset] Sequence set ** @return [AjBool] True on success ******************************************************************************/ static AjBool skipredundant_SeqsetToList (AjPList list, AjPSeqset seqset) { ajint n = 0; ajint x = 0; EmbPDmxNrseq seq_tmp = NULL; /* Temp. pointer for making seq_list. */ if(!list || !seqset) return ajFalse; n = ajSeqsetGetSize(seqset); for(x=0; x<n; x++) { seq_tmp = embDmxNrseqNew(ajSeqsetGetseqSeq(seqset, x)); ajListPushAppend(list, seq_tmp); seq_tmp = NULL; } return ajTrue; }
int main(int argc, char **argv) { /* Variable Declarations */ AjPSeqset seqset = NULL; AjPSeqout seqout = NULL; AjBool bigfirst; ajuint nseqs; ajuint i; /* ACD File Processing */ embInit("sizeseq", argc, argv); seqset = ajAcdGetSeqset("sequences"); bigfirst = ajAcdGetBoolean("descending"); seqout = ajAcdGetSeqoutall("outseq"); /* Application logic */ ajSeqsetSortLen(seqset); nseqs = ajSeqsetGetSize(seqset); if(bigfirst) { for(i=nseqs; i>0; i--) ajSeqoutWriteSeq(seqout, ajSeqsetGetseqSeq(seqset,i-1)); } else { for(i=0; i<nseqs; i++) ajSeqoutWriteSeq(seqout, ajSeqsetGetseqSeq(seqset,i)); } /* Memory management and exit */ ajSeqsetDel(&seqset); ajSeqoutClose(seqout); ajSeqoutDel(&seqout); embExit(); return 0; }
int main(int argc, char **argv) { /* Variable Declarations */ AjPSeqset seqset = NULL; AjPMatrixf fmat = NULL; float thresh; float threshlow; float threshup; float gapopen; float gapextend; AjPSeqout seqout = NULL; AjPSeqout seqoutred = NULL; AjPStr mode = NULL; ajint moden; ajuint i; /* toggle "feature" from ACD not retrieved ... no need */ const AjPSeq seq = NULL; AjPList list = NULL; /* List for redundancy removal. */ AjPUint keep = NULL; /* 1: Sequence in list was non-redundant, 0: redundant. */ ajuint nseq = 0; /* No. seqs. in list. */ ajint nseqnr = 0; /* No. non-redundant seqs. in list. */ /* ACD File Processing */ embInit("skipredundant", argc, argv); seqset = ajAcdGetSeqset("sequences"); mode = ajAcdGetListSingle("mode"); fmat = ajAcdGetMatrixf("datafile"); thresh = ajAcdGetFloat("threshold"); threshlow = ajAcdGetFloat("minthreshold"); threshup = ajAcdGetFloat("maxthreshold"); gapopen = ajAcdGetFloat("gapopen"); gapextend = ajAcdGetFloat("gapextend"); seqout = ajAcdGetSeqoutall("outseq"); seqoutred = ajAcdGetSeqoutall("redundantoutseq"); /* Application logic */ list = ajListNew(); skipredundant_SeqsetToList(list, seqset); keep = ajUintNew(); ajStrToInt(mode, &moden); if(moden == 1) /* Remove redundancy at a single threshold % sequence similarity */ { if((!embDmxSeqNR(list, &keep, &nseqnr, fmat, gapopen, gapextend, thresh, ajFalse))) ajFatal("embDmxSeqNR unexpected failure!"); } else if (moden == 2) /* 2: Remove redundancy outside a range of acceptable threshold % similarity */ { if((!embDmxSeqNRRange(list, &keep, &nseqnr, fmat, gapopen, gapextend, threshlow, threshup, ajFalse))) ajFatal("embDmxSeqNRRange unexpected failure!"); } else ajFatal("Invalid mode (not 1 or 2) which should never occur (check ACD file!)"); nseq = ajSeqsetGetSize(seqset); for(i=0; i<nseq; i++) { seq = ajSeqsetGetseqSeq(seqset, i); if(ajUintGet(keep, i)) ajSeqoutWriteSeq(seqout, seq); else if(seqoutred) ajSeqoutWriteSeq(seqoutred, seq); } /* Memory management and exit */ ajSeqsetDel(&seqset); ajMatrixfDel(&fmat); ajStrDel(&mode); ajSeqoutClose(seqout); ajSeqoutDel(&seqout); if(seqoutred) { ajSeqoutClose(seqoutred); ajSeqoutDel(&seqoutred); } skipredundant_ClearList(list); ajListFree(&list); ajUintDel(&keep); embExit(); return 0; }
int main(int argc, char **argv) { AjPSeqset seq1; AjPSeqset seq2; AjPFile list; ajint n1; ajint n2; ajint *lengths1; ajint *lengths2; ajuint *order1; ajuint *order2; ajint *hits1; ajint *hits2; ajint curr1; ajint curr2; ajint tmp1; ajint tmp2 = 0; ajint i; AjPStr operator; ajint OperatorCode=0; embInit("listor", argc, argv); seq1 = ajAcdGetSeqset("firstsequences"); seq2 = ajAcdGetSeqset("secondsequences"); list = ajAcdGetOutfile("outfile"); operator = ajAcdGetListSingle("operator"); /* get the operator value */ switch(ajStrGetCharFirst(operator)) { case 'O': OperatorCode = L_OR; break; case 'A': OperatorCode = L_AND; break; case 'X': OperatorCode = L_XOR; break; case 'N': OperatorCode = L_NOT; break; default: ajFatal("Invalid operator type: %S", operator); embExitBad(); } /* get the order of seqset 1 by length */ n1 = ajSeqsetGetSize(seq1); /* lengths of seq1 entries */ lengths1 = AJCALLOC0(n1, sizeof(ajint)); /* seq1 entries which match seq2 */ hits1 = AJCALLOC0(n1, sizeof(ajint)); /* seq1 entries in length order */ order1 = AJCALLOC0(n1, sizeof(ajint)); for(i=0; i<n1; i++) { lengths1[i] = ajSeqGetLen(ajSeqsetGetseqSeq(seq1, i)); order1[i] = i; hits1[i] = -1; } ajSortIntIncI(lengths1, order1, n1); /* get the order of seqset 2 by length */ n2 = ajSeqsetGetSize(seq2); lengths2 = AJCALLOC0(n2, sizeof(ajint)); hits2 = AJCALLOC0(n2, sizeof(ajint)); order2 = AJCALLOC0(n2, sizeof(ajint)); for(i=0; i<n2; i++) { lengths2[i] = ajSeqGetLen(ajSeqsetGetseqSeq(seq2, i)); order2[i] = i; hits2[i] = -1; } ajSortIntIncI(lengths2, order2, n2); /* ** go down the two sequence sets, by size order, looking for identical **lengths */ curr1 = 0; curr2 = 0; while(curr1 < n1 && curr2 < n2) { if(lengths1[order1[curr1]] < lengths2[order2[curr2]]) /* seq1 is shorter - increment curr1 index */ curr1++; else if(lengths1[order1[curr1]] > lengths2[order2[curr2]]) /* seq2 is shorter - increment curr2 index */ curr2++; else { /* identical lengths - check all seq1/seq2 entries of this len */ for(tmp1=curr1; tmp1<n1 && lengths1[order1[tmp1]] == lengths2[order2[curr2]]; tmp1++) for(tmp2=curr2; tmp2<n2 && lengths2[order2[tmp2]] == lengths2[order2[curr2]]; tmp2++) /* check to see if the sequences are identical */ if(!ajStrCmpCaseS(ajSeqGetSeqS(ajSeqsetGetseqSeq(seq1, order1[tmp1])), ajSeqGetSeqS(ajSeqsetGetseqSeq(seq2, order2[tmp2])))) { hits1[order1[tmp1]] = order2[tmp2]; hits2[order2[tmp2]] = order1[tmp1]; } curr1 = tmp1; curr2 = tmp2; } } /* output the required entries to the list file */ listor_Output(list, OperatorCode, seq1, seq2, hits1, hits2, n1, n2); AJFREE(lengths1); AJFREE(lengths2); AJFREE(order1); AJFREE(order2); AJFREE(hits1); AJFREE(hits2); ajFileClose(&list); ajStrDel(&operator); ajSeqsetDel(&seq1); ajSeqsetDel(&seq2); embExit(); return 0; }
int main(int argc, char **argv) { AjPSeqall seq1; AjPSeqset seq2; AjPSeq a; const AjPSeq b; AjPStr m = 0; AjPStr n = 0; AjPFile errorf; AjBool show = ajFalse; ajint lena = 0; ajint lenb = 0; const char *p; const char *q; AjPMatrixf matrix; AjPSeqCvt cvt = 0; float **sub; ajint *compass = NULL; float *path = NULL; float gapopen; float gapextend; float score; ajint begina; ajint i; ajuint k; ajint beginb; ajint start1 = 0; ajint start2 = 0; ajint end1 = 0; ajint end2 = 0; ajint width = 0; AjPTable seq1MatchTable = 0; ajint wordlen = 6; ajint oldmax = 0; AjPAlign align = NULL; embInit("supermatcher", argc, argv); matrix = ajAcdGetMatrixf("datafile"); seq1 = ajAcdGetSeqall("asequence"); seq2 = ajAcdGetSeqset("bsequence"); gapopen = ajAcdGetFloat("gapopen"); gapextend = ajAcdGetFloat("gapextend"); wordlen = ajAcdGetInt("wordlen"); align = ajAcdGetAlign("outfile"); errorf = ajAcdGetOutfile("errorfile"); width = ajAcdGetInt("width"); /* not the same as awidth */ gapopen = ajRoundFloat(gapopen, 8); gapextend = ajRoundFloat(gapextend, 8); sub = ajMatrixfGetMatrix(matrix); cvt = ajMatrixfGetCvt(matrix); embWordLength(wordlen); ajSeqsetTrim(seq2); while(ajSeqallNext(seq1,&a)) { ajSeqTrim(a); begina = 1 + ajSeqGetOffset(a); m = ajStrNewRes(1+ajSeqGetLen(a)); lena = ajSeqGetLen(a); ajDebug("Read '%S'\n", ajSeqGetNameS(a)); if(!embWordGetTable(&seq1MatchTable, a)) /* get table of words */ ajErr("Could not generate table for %s\n", ajSeqGetNameC(a)); for(k=0;k<ajSeqsetGetSize(seq2);k++) { b = ajSeqsetGetseqSeq(seq2, k); lenb = ajSeqGetLen(b); beginb = 1 + ajSeqGetOffset(b); ajDebug("Processing '%S'\n", ajSeqGetNameS(b)); p = ajSeqGetSeqC(a); q = ajSeqGetSeqC(b); if(!supermatcher_findstartpoints(seq1MatchTable,b,a, &start1, &start2, &end1, &end2)) { ajFmtPrintF(errorf, "No wordmatch start points for " "%s vs %s. No alignment\n", ajSeqGetNameC(a),ajSeqGetNameC(b)); continue; } n=ajStrNewRes(1+ajSeqGetLen(b)); ajStrAssignC(&m,""); ajStrAssignC(&n,""); ajDebug("++ %S v %S start:%d %d end:%d %d\n", ajSeqGetNameS(a), ajSeqGetNameS(b), start1, start2, end1, end2); if(end1-start1+1 > oldmax) { oldmax = ((end1-start1)+1); AJRESIZE(path,oldmax*width*sizeof(float)); AJRESIZE(compass,oldmax*width*sizeof(ajint)); ajDebug("++ resize to oldmax: %d\n", oldmax); } for(i=0;i<((end1-start1)+1)*width;i++) path[i] = 0.0; ajDebug("Calling embAlignPathCalcFast " "%d..%d [%d/%d] %d..%d [%d/%d]\n", start1, end1, (end1 - start1 + 1), lena, start2, end2, (end2 - start2 + 1), lenb); score = embAlignPathCalcSWFast(&p[start1],&q[start2], end1-start1+1,end2-start2+1, 0,width, gapopen,gapextend, path,sub,cvt, compass,show); embAlignWalkSWMatrixFast(path,compass,gapopen,gapextend,a,b, &m,&n,end1-start1+1,end2-start2+1, 0,width, &start1,&start2); if(!ajAlignFormatShowsSequences(align)) { ajAlignDefineCC(align, ajStrGetPtr(m), ajStrGetPtr(n), ajSeqGetNameC(a), ajSeqGetNameC(b)); ajAlignSetScoreR(align, score); } else { embAlignReportLocal(align, a, b, m,n,start1,start2, gapopen, gapextend, score,matrix, begina, beginb); } ajAlignWrite(align); ajAlignReset(align); ajStrDel(&n); } embWordFreeTable(&seq1MatchTable); /* free table of words */ seq1MatchTable=0; ajStrDel(&m); } if(!ajAlignFormatShowsSequences(align)) { ajMatrixfDel(&matrix); } AJFREE(path); AJFREE(compass); ajAlignClose(align); ajAlignDel(&align); ajSeqallDel(&seq1); ajSeqDel(&a); ajSeqsetDel(&seq2); ajFileClose(&errorf); embExit(); return 0; }
int main(int argc, char **argv) { AjPSeqset seqset = NULL; AjPStr refseq; /* input name/number of reference sequence */ ajint nrefseq; /* numeric reference sequence */ AjPMatrix matrix; /* scoring matrix structure */ ajint **sub; /* integer scoring matrix */ AjPSeqCvt cvt = 0; /* conversion table for scoring matrix */ float identity; ajint ident; float fplural; AjPStr cons; AjPSeq consensus; const AjPSeq ref; const AjPSeq seq; ajuint i; AjBool html; AjBool doheader; AjBool dousa; AjBool doname; AjBool doseqlength; AjBool doalignlength; AjBool dogaps; AjBool dogapcount; AjBool doidcount; AjBool dosimcount; AjBool dodifcount; AjBool dochange; AjBool dodesc; AjBool dowt; ajint seqlength; ajint alignlength; ajint gaps; ajint gapcount; ajint idcount; ajint simcount; ajint difcount; float change; AjPFile outfile; const AjPStr usa; const AjPStr name; AjPStr altusa; /* default name when the real name is not known */ AjPStr altname; AjPStr xxx = NULL; embInit("infoalign", argc, argv); seqset = ajAcdGetSeqset("sequence"); refseq = ajAcdGetString("refseq"); matrix = ajAcdGetMatrix("matrix"); ajSeqsetFill(seqset); outfile = ajAcdGetOutfile("outfile"); html = ajAcdGetBoolean("html"); doheader = ajAcdGetBoolean("heading"); dousa = ajAcdGetBoolean("usa"); doname = ajAcdGetBoolean("name"); doseqlength = ajAcdGetBoolean("seqlength"); doalignlength = ajAcdGetBoolean("alignlength"); dogaps = ajAcdGetBoolean("gaps"); dogapcount = ajAcdGetBoolean("gapcount"); doidcount = ajAcdGetBoolean("idcount"); dosimcount = ajAcdGetBoolean("simcount"); dodifcount = ajAcdGetBoolean("diffcount"); dochange = ajAcdGetBoolean("change"); dodesc = ajAcdGetBoolean("description"); dowt = ajAcdGetBoolean("weight"); /* consensus parameters */ fplural = ajAcdGetFloat("plurality"); identity = ajAcdGetFloat("identity"); cons = ajStrNew(); consensus = ajSeqNew(); altusa = ajStrNewC("-"); altname = ajStrNewC("-"); /* get conversion table and scoring matrix */ cvt = ajMatrixGetCvt(matrix); sub = ajMatrixGetMatrix(matrix); /* get the number of the reference sequence */ nrefseq = infoalign_Getrefseq(refseq, seqset); /* change the % plurality to the fraction of absolute total weight */ fplural = ajSeqsetGetTotweight(seqset) * fplural / 100; /* ** change the % identity to the number of identical sequences at a ** position required for consensus */ ident = ajSeqsetGetSize(seqset) * (ajint)identity / 100; /* get the consensus sequence */ embConsCalc(seqset, matrix, ajSeqsetGetSize(seqset), ajSeqsetGetLen(seqset), fplural, 0.0, ident, ajFalse, &cons); ajSeqAssignSeqS(consensus, cons); ajSeqAssignNameS(consensus,(xxx=ajStrNewC("Consensus"))); /* get the reference sequence */ if(nrefseq == -1) ref = consensus; else ref = ajSeqsetGetseqSeq(seqset, nrefseq); /* start the HTML table */ if(html) ajFmtPrintF(outfile,"<table border cellpadding=4 bgcolor=" "\"#FFFFF0\">\n"); /* print the header information */ if(doheader) { /* start the HTML table title line and output the Name header */ if(html) ajFmtPrintF(outfile, "<tr>"); else ajFmtPrintF(outfile, "%s", "# "); if(dousa) { if(html) ajFmtPrintF(outfile, "<th>USA</th>"); else ajFmtPrintF(outfile, "%-16s", "USA"); } if(doname) { if(html) ajFmtPrintF(outfile, "<th>Name</th>"); else ajFmtPrintF(outfile, "%-12s", "Name"); } if(doseqlength) { if(html) ajFmtPrintF(outfile, "<th>Sequence Length</th>"); else ajFmtPrintF(outfile, "SeqLen\t"); } if(doalignlength) { if(html) ajFmtPrintF(outfile, "<th>Aligned Length</th>"); else ajFmtPrintF(outfile, "AlignLen\t"); } if(dogaps) { if(html) ajFmtPrintF(outfile, "<th>Gaps</th>"); else ajFmtPrintF(outfile, "Gaps\t"); } if(dogapcount) { if(html) ajFmtPrintF(outfile, "<th>Gap Length</th>"); else ajFmtPrintF(outfile, "GapLen\t"); } if(doidcount) { if(html) ajFmtPrintF(outfile, "<th>Identity</th>"); else ajFmtPrintF(outfile, "Ident\t"); } if(dosimcount) { if(html) ajFmtPrintF(outfile, "<th>Similarity</th>"); else ajFmtPrintF(outfile, "Similar\t"); } if(dodifcount) { if(html) ajFmtPrintF(outfile, "<th>Difference</th>"); else ajFmtPrintF(outfile, "Differ\t"); } if(dochange) { if(html) ajFmtPrintF(outfile, "<th>%% Change</th>"); else ajFmtPrintF(outfile, "%% Change\t"); } if(dowt) { if(html) ajFmtPrintF(outfile, "<th>Weight</th>"); else ajFmtPrintF(outfile, "Weight\t"); } if(dodesc) { if(html) ajFmtPrintF(outfile, "<th>Description</th>"); else ajFmtPrintF(outfile, "Description"); } /* end the HTML table title line */ if(html) ajFmtPrintF(outfile, "</tr>\n"); else ajFmtPrintF(outfile, "\n"); } for(i=0; i<ajSeqsetGetSize(seqset); i++) { seq = ajSeqsetGetseqSeq(seqset, i); /* get the usa ('-' if unknown) */ usa = ajSeqGetUsaS(seq); if(ajStrGetLen(usa) == 0) usa = altusa; /* get the name ('-' if unknown) */ name = ajSeqGetNameS(seq); if(ajStrGetLen(name) == 0) name = altname; /* get the stats from the comparison to the reference sequence */ infoalign_Compare(ref, seq, sub, cvt, &seqlength, &alignlength, &gaps, &gapcount, &idcount, &simcount, &difcount, &change); /* start table line */ if(html) ajFmtPrintF(outfile, "<tr>"); if(dousa) infoalign_OutputStr(outfile, usa, html, (dodesc || dowt || dochange || dodifcount || dosimcount || doidcount || dogapcount || dogaps || doseqlength || doalignlength || doname), 18); if(doname) infoalign_OutputStr(outfile, name, html, (dodesc || dowt || dochange || dodifcount || dosimcount || doidcount || dogapcount || dogaps || doseqlength || doalignlength), 14); if(doseqlength) infoalign_OutputInt(outfile, seqlength, html, (dodesc || dowt || dochange || dodifcount || dosimcount || doidcount || dogapcount || dogaps || doalignlength)); if(doalignlength) infoalign_OutputInt(outfile, alignlength, html, (dodesc || dowt || dochange || dodifcount || dosimcount || doidcount || dogapcount || dogaps)); if(dogaps) infoalign_OutputInt(outfile, gaps, html, (dodesc || dowt || dochange || dodifcount || dosimcount || doidcount || dogapcount)); if(dogapcount) infoalign_OutputInt(outfile, gapcount, html, (dodesc || dowt || dochange || dodifcount || dosimcount || doidcount)); if(doidcount) infoalign_OutputInt(outfile, idcount, html, (dodesc || dowt || dochange || dodifcount || dosimcount)); if(dosimcount) infoalign_OutputInt(outfile, simcount, html, (dodesc || dowt || dochange || dodifcount)); if(dodifcount) infoalign_OutputInt(outfile, difcount, html, (dodesc || dowt || dochange)); if(dochange) infoalign_OutputFloat(outfile, change, html, (dodesc || dowt) ); if(dowt) infoalign_OutputFloat(outfile, ajSeqsetGetseqWeight(seqset,i), html, dodesc); if(dodesc) infoalign_OutputStr(outfile, ajSeqGetDescS(seq), html, ajFalse, NOLIMIT); /* end table line */ if(html) ajFmtPrintF(outfile, "</tr>\n"); else ajFmtPrintF(outfile, "\n"); } /* end the HTML table */ if(html) ajFmtPrintF(outfile, "</table>\n"); ajFileClose(&outfile); /* tidy up */ ajStrDel(&altusa); ajStrDel(&altname); ajStrDel(&xxx); ajSeqDel(&consensus); ajSeqsetDel(&seqset); ajStrDel(&refseq); ajMatrixDel(&matrix); ajStrDel(&cons); embExit(); return 0; }
int main(int argc, char **argv) { ajint i; ajint numseq; ajint j = 0; ajint numres; ajint count; ajint k; ajint kmax; float defheight; float currentscale; AjPStr shade = NULL; AjPFloat pair = NULL; AjPGraph graph = NULL; AjPMatrix cmpmatrix = NULL; AjPSeqCvt cvt = NULL; AjPStr matcodes = NULL; AjBool consensus; AjBool colourbyconsensus; AjBool colourbyresidues; AjBool colourbyshade = AJFALSE; AjBool boxit; AjBool boxcol; AjBool portrait; AjBool collision; ajint identity; AjBool listoptions; ajint alternative; AjPStr altstr = NULL; AjPStr sidentity = NULL; AjPStr ssimilarity = NULL; AjPStr sother = NULL; AjPStr sboxcolval = NULL; AjPStr options = NULL; /* ajint showscore = 0; */ ajint iboxcolval = 0; ajint cidentity = RED; ajint csimilarity = GREEN; ajint cother = BLACK; float fxp; float fyp; float yincr; float y; ajint ixlen; ajint iylen; ajint ixoff; ajint iyoff; char res[2] = " "; float *score = 0; float scoremax = 0; float *identical = NULL; ajint identicalmaxindex; float *matching = NULL; ajint matchingmaxindex; float *colcheck = NULL; ajint **matrix; ajint m1 = 0; ajint m2 = 0; ajint ms = 0; ajint highindex = 0; ajint myindex; ajint *previous = 0; AjBool iscons = ajFalse; ajint currentstate = 0; ajint oldfg = 0; float fold = 0.0; ajint *colmat = 0; ajint *shadecolour = 0; /* float identthresh = 1.5; */ /* float simthresh = 1.0; */ /* float relthresh = 0.5; */ float part = 0.0; const char *cptr; ajint resbreak; float fplural; float ystart; float xmin; float xmax; float xmid; AjPTime ajtime; ajint gapcount = 0; ajint countforgap = 0; ajint boxindex; float max; ajint matsize; ajint seqperpage = 0; ajint startseq; ajint endseq; ajint newILend = 0; ajint newILstart; void *freeptr; ajint itmp; embInit("prettyplot", argc, argv); seqset = ajAcdGetSeqset("sequences"); numres = ajAcdGetInt("residuesperline"); resbreak = ajAcdGetInt("resbreak"); ajSeqsetFill(seqset); /* Pads sequence set with gap characters */ numseq = ajSeqsetGetSize(seqset); graph = ajAcdGetGraph("graph"); colourbyconsensus = ajAcdGetBoolean("ccolours"); colourbyresidues = ajAcdGetBoolean("docolour"); shade = ajAcdGetString("shade"); pair = ajAcdGetArray("pair"); identity = ajAcdGetInt("identity"); boxit = ajAcdGetBoolean("box"); ajtime = ajTimeNewTodayFmt("daytime"); ajSeqsetTrim(seqset); /* offset = ajSeqsetGetOffset(seqset); Unused */ ajGraphAppendTitleS(graph, ajSeqsetGetUsa(seqset)); if(boxit) { AJCNEW(seqboxptr, numseq); for(i=0;i<numseq;i++) AJCNEW(seqboxptr[i], ajSeqsetGetLen(seqset)); } boxcol = ajAcdGetBoolean("boxcol"); sboxcolval = ajAcdGetString("boxuse"); if(boxcol) { iboxcolval = ajGraphicsCheckColourS(sboxcolval); if(iboxcolval == -1) iboxcolval = GREY; } consensus = ajAcdGetBoolean("consensus"); if(consensus) { AJCNEW(constr, ajSeqsetGetLen(seqset)+1); constr[0] = '\0'; } shownames = ajAcdGetBoolean("name"); shownumbers = ajAcdGetBoolean("number"); charlen = ajAcdGetInt("maxnamelen"); fplural = ajAcdGetFloat("plurality"); portrait = ajAcdGetBoolean("portrait"); collision = ajAcdGetBoolean("collision"); listoptions = ajAcdGetBoolean("listoptions"); altstr = ajAcdGetListSingle("alternative"); cmpmatrix = ajAcdGetMatrix("matrixfile"); ajStrToInt(altstr, &alternative); matrix = ajMatrixGetMatrix(cmpmatrix); cvt = ajMatrixGetCvt(cmpmatrix); matsize = ajMatrixGetSize(cmpmatrix); AJCNEW(identical,matsize); AJCNEW(matching,matsize); AJCNEW(colcheck,matsize); numgaps = numres/resbreak; numgaps--; if(portrait) { ajGraphicsSetPortrait(1); ystart = (float) 75.0; } else ystart = (float) 75.0; /* pair is an array of three non-negative floats */ /* identthresh = ajFloatGet(pair,0); Unused */ /* simthresh = ajFloatGet(pair,1); Unused */ /* relthresh = ajFloatGet(pair,2); Unused */ /* ** shade is a formatted 4-character string. Characters BLPW only. ** controlled by a pattern in ACD. */ if(ajStrGetLen(shade)) { AJCNEW(shadecolour,4); cptr = ajStrGetPtr(shade); for(i=0;i<4;i++){ if(cptr[i]== 'B' || cptr[i]== 'b') shadecolour[i] = BLACK; else if(cptr[i]== 'L' || cptr[i]== 'l') shadecolour[i] = BROWN; else if(cptr[i]== 'P' || cptr[i]== 'p') shadecolour[i] = WHEAT; else if(cptr[i]== 'W' || cptr[i]== 'w') shadecolour[i] = WHITE; } colourbyconsensus = colourbyresidues = ajFalse; colourbyshade = ajTrue; } /* ** we can colour by consensus or residue but not both ** if we have to choose, use the consensus */ if(colourbyconsensus && colourbyresidues) colourbyconsensus = AJFALSE; sidentity = ajAcdGetString("cidentity"); ssimilarity = ajAcdGetString("csimilarity"); sother = ajAcdGetString("cother"); if(colourbyconsensus) { cidentity = ajGraphicsCheckColourS(sidentity); if(cidentity == -1) cidentity = RED; csimilarity = ajGraphicsCheckColourS(ssimilarity); if(csimilarity == -1) csimilarity = GREEN; cother = ajGraphicsCheckColourS(sother); if(cother == -1) cother = BLACK; } else if(colourbyresidues) { matcodes = ajMatrixGetCodes(cmpmatrix); if(ajSeqsetIsProt(seqset)) colmat = ajGraphicsBasecolourNewProt(matcodes); else colmat = ajGraphicsBasecolourNewNuc(matcodes); } /* output the options used as the subtitle for the bottom of the graph */ if(listoptions) { ajStrAssignC(&options,""); ajFmtPrintAppS(&options,"-plurality %.1f",fplural); if(collision) ajStrAppendC(&options," -collision"); else ajStrAppendC(&options," -nocollision"); if(boxit) ajStrAppendC(&options," -box"); else ajStrAppendC(&options," -nobox"); if(boxcol) ajStrAppendC(&options," -boxcol"); else ajStrAppendC(&options," -noboxcol"); if(colourbyconsensus) ajStrAppendC(&options," -colbyconsensus"); else if(colourbyresidues) ajStrAppendC(&options," -colbyresidues"); else if(colourbyshade) ajStrAppendC(&options," -colbyshade"); else ajStrAppendC(&options," -nocolour"); if(alternative==2) ajStrAppendC(&options," -alt 2"); else if(alternative==1) ajStrAppendC(&options," -alt 1"); else if(alternative==3) ajStrAppendC(&options," -alt 3"); } AJCNEW(seqcolptr, numseq); for(i=0;i<numseq;i++) AJCNEW(seqcolptr[i], ajSeqsetGetLen(seqset)); AJCNEW(seqcharptr, numseq); AJCNEW(seqnames, numseq); AJCNEW(score, numseq); AJCNEW(previous, numseq); AJCNEW(seqcount, numseq); for(i=0;i<numseq;i++) { ajSeqsetFmtUpper(seqset); seqcharptr[i] = ajSeqsetGetseqSeqC(seqset, i); seqnames[i] = 0; ajStrAppendS(&seqnames[i],ajSeqsetGetseqNameS(seqset, i)); ajStrTruncateLen(&seqnames[i],charlen); previous[i] = 0; seqcount[i] = 0; } /* ** user will pass the number of residues to fit a page ** therefore we now need to calculate the size of the chars ** based on this and get the new char width. ** 'charlen' maximum characters for the name (truncated above) */ ajGraphicsGetCharsize(&defheight,¤tscale); xmin = -charlen - (float)2.0; xmax = (float)numres+(float)11.0+(float)(numres/resbreak); xmid = (xmax + xmin)/(float)2.0; ajGraphOpenWin(graph, xmin, xmax, (float)0.0, ystart+(float)1.0); ajGraphGetParamsPage(graph, &fxp,&fyp,&ixlen,&iylen,&ixoff,&iyoff); if(portrait) { itmp = ixlen; ixlen = iylen; iylen = itmp; } ajGraphicsGetCharsize(&defheight,¤tscale); ajGraphicsSetCharscale(((float)ixlen/((float)(numres+charlen+1)* (currentscale * (float) 1.5)))/ currentscale); /* ajGraphicsSetCharscale(((float)ixlen/((float)(numres+charlen)* (currentscale+(float)1.0)))/ currentscale); */ ajGraphicsGetCharsize(&defheight,¤tscale); yincr = (currentscale + (float)3.0)*(float)0.3; /* ** If we have titles (now the standard graph title and subtitle and footer) ** leave 7 rows of space for them */ y=ystart-(float)7.0; if(ajStrGetLen(options)) { fold = ajGraphicsSetCharscale(1.0); ajGraphicsDrawposTextAtmid(xmid,2.0, ajStrGetPtr(options)); ajGraphicsSetCharscale(fold); } /* if sequences per page not set then calculate it */ if(!seqperpage) { seqperpage = prettyplot_calcseqperpage(yincr,y,consensus); if(seqperpage>numseq) seqperpage=numseq; } count = 0; /* ** for boxes we need to set a foreground colour for the box lines ** and save the current foreground colour */ if(boxit && boxcol) oldfg = ajGraphicsSetFgcolour(iboxcolval); /* ** step through each residue position */ kmax = ajSeqsetGetLen(seqset) - 1; for(k=0; k<= kmax; k++) { /* reset column score array */ for(i=0;i<numseq;i++) score[i] = 0.0; /* reset matrix character testing arrays */ for(i=0;i<matsize;i++) { identical[i] = 0.0; matching[i] = 0.0; colcheck[i] = 0.0; } /* generate a score for this residue in each sequence */ for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); for(j=0;j<numseq;j++) { m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[j][k]); if(m1 && m2) score[i] += (float)matrix[m1][m2]* ajSeqsetGetseqWeight(seqset, j); } if(m1) identical[m1] += ajSeqsetGetseqWeight(seqset, i); } /* find the highest score */ highindex = -1; scoremax = INT_MIN; /*ajDebug("Scores at position %d:\n", k);*/ for(i=0;i<numseq;i++) { /*ajDebug(" seq %d: '%c' %f\n",i,seqcharptr[i][k],score[i]);*/ if(score[i] > scoremax) { scoremax = score[i]; highindex = i; } } for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if(!matching[m1]) { for(j=0;j<numseq;j++) { m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[j][k]); if(m1 && m2 && matrix[m1][m2] > 0) matching[m1] += ajSeqsetGetseqWeight(seqset, j); } } } /* find highs for matching and identical */ matchingmaxindex = 0; identicalmaxindex = 0; for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if(identical[m1] > identical[identicalmaxindex]) identicalmaxindex = m1; } for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if(matching[m1] > matching[matchingmaxindex]) matchingmaxindex = m1; else if(matching[m1] == matching[matchingmaxindex]) { if(identical[m1] > identical[matchingmaxindex]) matchingmaxindex= m1; } } iscons = ajFalse; boxindex = -1; max = -3; ajDebug("k:%2d highindex:%2d matching:%4.2f\n", k, highindex, matching[ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])]); if(highindex != -1 && matching[ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])] >= fplural) { iscons = ajTrue; boxindex = highindex; } else { for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if(matching[m1] > max) { max = matching[m1]; highindex = i; } else if(matching[m1] == max) { if(identical[m1] > identical[ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])] ) { max = matching[m1]; highindex = i; } } } if(matching[ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])] >= fplural) { iscons = ajTrue; boxindex = highindex; } } if(iscons) { if(!collision) { /* check for collisions */ if(alternative == 1) { /* check to see if this is unique for collisions */ for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if(identical[m1] >= identical[identicalmaxindex] && m1 != identicalmaxindex) iscons = ajFalse; } /*ajDebug("after (alt=1) iscons: %B",iscons);*/ } else if(alternative == 2) { for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if((matching[m1] >= matching[matchingmaxindex] && m1 != matchingmaxindex && matrix[m1][matchingmaxindex] < 0.1)|| (identical[m1] >= identical[matchingmaxindex] && m1 != matchingmaxindex)) iscons = ajFalse; } } else if(alternative == 3) { /* ** to do this check one is NOT in consensus to see if ** another score of fplural has been found */ ms = ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k]); for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if(ms != m1 && colcheck[m1] == 0.0) /* NOT in the current consensus */ for(j=0;j<numseq;j++) { m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[j][k]); if( matrix[ms][m2] < 0.1) { /* NOT in the current consensus */ if( matrix[m1][m2] > 0.1) colcheck[m1] += ajSeqsetGetseqWeight(seqset, j); } } } for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); /* if any other matches then we have a collision */ if(colcheck[m1] >= fplural) iscons = ajFalse; } /*ajDebug("after alt=2 iscons: %B", iscons);*/ } else { for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if((matching[m1] >= matching[matchingmaxindex] && m1 != matchingmaxindex && matrix[m1][matchingmaxindex] < 0.1)) iscons = ajFalse; if(identical[m1] >= identical[matchingmaxindex] && m1 != matchingmaxindex && matrix[m1][matchingmaxindex] > 0.1) iscons = ajFalse; } if(!iscons) { /* matches failed try identicals */ if(identical[identicalmaxindex] >= fplural) { iscons = ajTrue; /* ** if nothing has an equal or higher match that ** does not match highest then false */ for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if(identical[m1] >= identical[identicalmaxindex] && m1 != identicalmaxindex) iscons = ajFalse; else if(matching[m1] >= matching[identicalmaxindex] && matrix[m1][matchingmaxindex] <= 0.0) iscons = ajFalse; else if(m1 == identicalmaxindex) j = i; } if(iscons) highindex = j; } } } } if(identity) { j = 0; for(i=0;i<numseq;i++) if(seqcharptr[highindex][k] == seqcharptr[i][k]) j++; if(j<identity) iscons = ajFalse; } } /* ** Done a full line of residues ** Boxes have been defined up to this point */ if(count >= numres ) { /* check y position for next set */ y=y-(yincr*((float)numseq+(float)2.0+((float)consensus*(float)2))); if(y<yincr*((float)numseq+(float)2.0+((float)consensus*(float)2))) { /* full page - print it */ y=ystart-(float)6.0; startseq = 0; endseq = seqperpage; newILstart = newILend; newILend = k; while(startseq < numseq) { /* AJB */ /*if(startseq != 0) ajGraphNewpage(graph, AJFALSE);*/ /*ajDebug("Inner loop: startseq: %d numseq: %d endseq: %d\n", startseq, numseq, endseq);*/ if(endseq>numseq) endseq=numseq; prettyplot_fillinboxes(numseq,ajSeqsetGetLen(seqset), startseq,endseq, newILstart,newILend, numres,resbreak, boxit,boxcol,consensus, ystart,yincr,cvt); startseq = endseq; endseq += seqperpage; ajGraphNewpage(graph, AJFALSE); } } count = 0; gapcount = 0; } count++; countforgap++; for(j=0;j<numseq;j++) { /* START OF BOXES */ if(boxit) { seqboxptr[j][k] = 0; if(boxindex!=-1) { myindex = boxindex; if(matrix[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])] [ajSeqcvtGetCodeK(cvt, seqcharptr[myindex][k])] > 0) part = 1.0; else { if(identical[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])] >= fplural) part = 1.0; else part = 0.0; } if(previous[j] != part) /* draw vertical line */ seqboxptr[j][k] |= BOXLEF; if(j==0) { /* special case for horizontal line */ if(part) { currentstate = 1; /* draw hori line */ seqboxptr[j][k] |= BOXTOP; } else currentstate = 0; } else { /* j != 0 Normal case for horizontal line */ if(part != currentstate) { /*draw hori line */ seqboxptr[j][k] |= BOXTOP; currentstate = (ajint) part; } } if(j== numseq-1 && currentstate) /* draw horiline at bottom */ seqboxptr[j][k] |= BOXBOT; previous[j] = (ajint) part; } else { part = 0; if(previous[j]) { /* draw vertical line */ seqboxptr[j][k] |= BOXLEF; } previous[j] = 0; } if(count == numres || k == kmax || countforgap >= resbreak ) { /* last one on the row or a break*/ if(previous[j]) { /* draw vertical line */ seqboxptr[j][k] |= BOXRIG; } previous[j] = 0; } } /* end box */ if(boxit && boxcol) if(boxindex != -1) { myindex = boxindex; if(matrix[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])] [ajSeqcvtGetCodeK(cvt, seqcharptr[myindex][k])] > 0 || identical[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])] >= fplural ) seqboxptr[j][k] |= BOXCOLOURED; } /* END OF BOXES */ if(ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])) res[0] = seqcharptr[j][k]; else res[0] = '-'; if(colourbyconsensus) { part = (float) matrix[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])] [ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])]; if(iscons && seqcharptr[highindex][k] == seqcharptr[j][k]) seqcolptr[j][k] = cidentity; else if(part > 0.0) seqcolptr[j][k] = csimilarity; else seqcolptr[j][k] = cother; } else if(colourbyresidues) seqcolptr[j][k] = colmat[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])]; else if(iscons && colourbyshade) { part = (float) matrix[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])] [ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])]; if(part >= 1.5) seqcolptr[j][k] = shadecolour[0]; else if(part >= 1.0) seqcolptr[j][k] = shadecolour[1]; else if(part >= 0.5) seqcolptr[j][k] = shadecolour[2]; else seqcolptr[j][k] = shadecolour[3]; } else if(colourbyshade) seqcolptr[j][k] = shadecolour[3]; else seqcolptr[j][k] = BLACK; } if(consensus) { if(iscons) res[0] = seqcharptr[highindex][k]; else res[0] = '-'; strcat(constr,res); } if(countforgap >= resbreak) { gapcount++; countforgap=0; } } startseq = 0; endseq=seqperpage; newILstart = newILend; newILend = k; while(startseq < numseq) { if(startseq) ajGraphNewpage(graph, AJFALSE); /*ajDebug("Final loop: startseq: %d numseq: %d endseq: %d\n", startseq, numseq, endseq);*/ if(endseq>numseq) endseq = numseq; prettyplot_fillinboxes(numseq,ajSeqsetGetLen(seqset), startseq,endseq, newILstart,newILend, numres,resbreak, boxit,boxcol,consensus, ystart,yincr,cvt); startseq = endseq; endseq += seqperpage; } ajGraphicsGetCharsize(&defheight,¤tscale); if(boxit && boxcol) oldfg = ajGraphicsSetFgcolour(oldfg); ajGraphicsCloseWin(); ajGraphxyDel(&graph); ajStrDel(&sidentity); ajStrDel(&ssimilarity); ajStrDel(&sother); ajStrDel(&options); ajStrDel(&altstr); ajStrDel(&matcodes); for(i=0;i<numseq;i++) { ajStrDel(&seqnames[i]); AJFREE(seqcolptr[i]); if(seqboxptr) AJFREE(seqboxptr[i]); } AJFREE(seqcolptr); AJFREE(seqboxptr); AJFREE(seqnames); AJFREE(score); AJFREE(previous); AJFREE(seqcount); AJFREE(colmat); AJFREE(shadecolour); freeptr = (void *) seqcharptr; AJFREE(freeptr); AJFREE(identical); AJFREE(matching); AJFREE(colcheck); ajSeqsetDel(&seqset); ajMatrixDel(&cmpmatrix); ajStrDel(&shade); ajStrDel(&sboxcolval); ajStrDel(&sidentity); ajStrDel(&ssimilarity); ajStrDel(&sother); ajFloatDel(&pair); ajTimeDel(&ajtime); AJFREE(constr); embExit(); return 0; }
int main(int argc, char **argv) { AjPSeqset seqset; const AjPSeq seq1; const AjPSeq seq2; ajint wordlen; AjPTable seq1MatchTable = NULL; AjPList matchlist ; AjPGraph graph = 0; ajuint i; ajuint j; float total=0; ajuint acceptableticks[]= { 1,10,50,100,200,500,1000,1500,10000,50000, 100000,500000,1000000,5000000 }; ajint numbofticks = 10; ajint gap,tickgap; AjBool boxit = AJTRUE; AjBool dumpfeat = AJFALSE; float xmargin; float ymargin; float k; char ptr[10]; float ticklen; float onefifth; AjPFeattable *tabptr = NULL; AjPFeattabOut seq1out = NULL; AjPStr sajb = NULL; float flen1; float flen2; ajuint tui; embInit("polydot", argc, argv); wordlen = ajAcdGetInt("wordsize"); seqset = ajAcdGetSeqset("sequences"); graph = ajAcdGetGraph("graph"); gap = ajAcdGetInt("gap"); boxit = ajAcdGetBoolean("boxit"); seq1out = ajAcdGetFeatout("outfeat"); dumpfeat = ajAcdGetToggle("dumpfeat"); sajb = ajStrNew(); embWordLength(wordlen); AJCNEW(lines,ajSeqsetGetSize(seqset)); AJCNEW(pts,ajSeqsetGetSize(seqset)); AJCNEW(tabptr,ajSeqsetGetSize(seqset)); for(i=0;i<ajSeqsetGetSize(seqset);i++) { seq1 = ajSeqsetGetseqSeq(seqset, i); total += ajSeqGetLen(seq1); } total +=(float)(gap*(ajSeqsetGetSize(seqset)-1)); xmargin = total*(float)0.15; ymargin = total*(float)0.15; ticklen = xmargin*(float)0.1; onefifth = xmargin*(float)0.2; i = 0; while(acceptableticks[i]*numbofticks < ajSeqsetGetLen(seqset)) i++; if(i<=13) tickgap = acceptableticks[i]; else tickgap = acceptableticks[13]; ajGraphAppendTitleS(graph, ajSeqsetGetUsa(seqset)); ajGraphOpenWin(graph, (float)0.0-xmargin,(total+xmargin)*(float)1.35, (float)0.0-ymargin, total+ymargin); ajGraphicsSetCharscale((float)0.3); for(i=0;i<ajSeqsetGetSize(seqset);i++) { which = i; seq1 = ajSeqsetGetseqSeq(seqset, i); tui = ajSeqGetLen(seq1); flen1 = (float) tui; if(embWordGetTable(&seq1MatchTable, seq1)){ /* get table of words */ for(j=0;j<ajSeqsetGetSize(seqset);j++) { seq2 = ajSeqsetGetseqSeq(seqset, j); tui = ajSeqGetLen(seq2); flen2 = (float) tui; if(boxit) ajGraphicsDrawposRect(xstart,ystart, xstart+flen1, ystart+flen2); matchlist = embWordBuildMatchTable(seq1MatchTable, seq2, ajTrue); if(matchlist) polydot_plotMatches(matchlist); if(i<j && dumpfeat) embWordMatchListConvToFeat(matchlist,&tabptr[i], &tabptr[j],seq1, seq2); if(matchlist) /* free the match structures */ embWordMatchListDelete(&matchlist); if(j==0) { for(k=0.0;k<ajSeqGetLen(seq1);k+=tickgap) { ajGraphicsDrawposLine(xstart+k,ystart,xstart+k, ystart-ticklen); sprintf(ptr,"%d",(ajint)k); ajGraphicsDrawposTextAtmid(xstart+k, ystart-(onefifth), ptr); } ajGraphicsDrawposTextAtmid( xstart+(flen1/(float)2.0), ystart-(3*onefifth), ajStrGetPtr(ajSeqsetGetseqNameS(seqset, i))); } if(i==0) { for(k=0.0;k<ajSeqGetLen(seq2);k+=tickgap) { ajGraphicsDrawposLine(xstart,ystart+k,xstart-ticklen, ystart+k); sprintf(ptr,"%d",(ajint)k); ajGraphicsDrawposTextAtend(xstart-(onefifth), ystart+k, ptr); } ajGraphicsDrawposTextAtlineJustify( xstart-(3*onefifth), ystart+(flen2/(float)2.0), xstart-(3*onefifth),ystart+flen2, ajStrGetPtr(ajSeqsetGetseqNameS(seqset, j)),0.5); } ystart += flen2+(float)gap; } } embWordFreeTable(&seq1MatchTable); seq1MatchTable = NULL; xstart += flen1+(float)gap; ystart = 0.0; } ajGraphicsDrawposTextAtstart(total+onefifth,total-(onefifth), "No. Length Lines Points Sequence"); for(i=0;i<ajSeqsetGetSize(seqset);i++) { seq1 = ajSeqsetGetseqSeq(seqset, i); ajFmtPrintS(&sajb,"%3u %6d %5d %6d %s",i+1, ajSeqGetLen(seq1),lines[i], pts[i],ajSeqGetNameC(seq1)); ajGraphicsDrawposTextAtstart(total+onefifth,total-(onefifth*(i+2)), ajStrGetPtr(sajb)); } if(dumpfeat && seq1out) { for(i=0;i<ajSeqsetGetSize(seqset);i++) { ajFeattableWrite(seq1out, tabptr[i]); ajFeattableDel(&tabptr[i]); } } ajGraphicsClose(); ajGraphxyDel(&graph); ajStrDel(&sajb); AJFREE(lines); AJFREE(pts); AJFREE(tabptr); ajSeqsetDel(&seqset); ajFeattabOutDel(&seq1out);; embExit(); return 0; }
int main(int argc, char *argv[]) { char *string; char *structure=NULL; char *cstruc=NULL; char *ns_bases=NULL; char *c; int n_seq; int i; int length; int sym; int endgaps = 0; int mis = 0; double min_en; double real_en; double sfact = 1.07; int pf = 0; int istty; char *AS[MAX_NUM_NAMES]; /* aligned sequences */ char *names[MAX_NUM_NAMES]; /* sequence names */ AjPSeqset seq = NULL; AjPFile confile = NULL; AjPFile alifile = NULL; AjPFile paramfile = NULL; AjPFile outf = NULL; AjPFile essfile = NULL; AjPFile dotfile = NULL; AjPStr constring = NULL; float eT = 0.; AjBool eGU; AjBool eclose; AjBool lonely; AjPStr ensbases = NULL; AjBool etloop; AjPStr eenergy = NULL; char ewt = '\0'; float escale = 0.; AjPStr edangles = NULL; char edangle = '\0'; ajint len; AjPSeq tseq = NULL; AjPStr tname = NULL; int circ = 0; int doAlnPS = 0; int doColor = 0; embInitPV("vrnaalifoldpf",argc,argv,"VIENNA",VERSION); constring = ajStrNew(); seq = ajAcdGetSeqset("sequence"); confile = ajAcdGetInfile("constraintfile"); paramfile = ajAcdGetInfile("paramfile"); eT = ajAcdGetFloat("temperature"); eGU = ajAcdGetBoolean("gu"); eclose = ajAcdGetBoolean("closegu"); lonely = ajAcdGetBoolean("lp"); ensbases = ajAcdGetString("nsbases"); etloop = ajAcdGetBoolean("tetraloop"); eenergy = ajAcdGetListSingle("energy"); escale = ajAcdGetFloat("scale"); edangles = ajAcdGetListSingle("dangles"); mis = !!ajAcdGetBoolean("most"); endgaps = !!ajAcdGetBoolean("endgaps"); nc_fact = (double) ajAcdGetFloat("nspenalty"); cv_fact = (double) ajAcdGetFloat("covariance"); outf = ajAcdGetOutfile("outfile"); essfile = ajAcdGetOutfile("ssoutfile"); alifile = ajAcdGetOutfile("alignoutfile"); circ = !!ajAcdGetBoolean("circular"); doColor = !!ajAcdGetBoolean("colour"); dotfile = ajAcdGetOutfile("dotoutfile"); do_backtrack = 1; pf = 1; string = NULL; istty = 0; dangles = 2; temperature = (double) eT; noGU = (eGU) ? 0 : 1; no_closingGU = (eclose) ? 0 : 1; noLonelyPairs = (lonely) ? 0 : 1; ns_bases = (ajStrGetLen(ensbases)) ? MAJSTRGETPTR(ensbases) : NULL; tetra_loop = !!etloop; ewt = *ajStrGetPtr(eenergy); if(ewt == '0') energy_set = 0; else if(ewt == '1') energy_set = 1; else if(ewt == '2') energy_set = 2; sfact = (double) escale; edangle = *ajStrGetPtr(edangles); if(edangle == '0') dangles = 0; else if(edangle == '1') dangles = 1; else if(edangle == '2') dangles = 2; else if(edangle == '3') dangles = 3; if(paramfile) read_parameter_file(paramfile); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } if(alifile) doAlnPS = 1; if(confile) vienna_GetConstraints(confile,&constring); n_seq = ajSeqsetGetSize(seq); if(n_seq > MAX_NUM_NAMES - 1) ajFatal("[e]RNAalifold is restricted to %d sequences\n", MAX_NUM_NAMES - 1); if (n_seq==0) ajFatal("No sequences found"); for(i=0;i<n_seq;++i) { tseq = (AjPSeq) ajSeqsetGetseqSeq(seq,i); ajSeqGapStandard(tseq, '-'); tname = (AjPStr) ajSeqsetGetseqNameS(seq,i); len = ajSeqGetLen(tseq); AS[i] = (char *) space(len+1); names[i] = (char *) space(ajStrGetLen(tname)+1); strcpy(AS[i],ajSeqGetSeqC(tseq)); strcpy(names[i],ajStrGetPtr(tname)); } AS[n_seq] = NULL; names[n_seq] = NULL; if (endgaps) for (i=0; i<n_seq; i++) mark_endgaps(AS[i], '~'); length = (int) strlen(AS[0]); structure = (char *) space((unsigned) length+1); if(confile) { fold_constrained = 1; strcpy(structure,ajStrGetPtr(constring)); } if (circ && noLonelyPairs) ajWarn( "warning, depending on the origin of the circular sequence, " "some structures may be missed when using -noLP\n" "Try rotating your sequence a few times\n"); if (circ) min_en = circalifold((const char **)AS, structure); else min_en = alifold(AS, structure); { int i; double s=0; extern int eos_debug; eos_debug=-1; /* shut off warnings about nonstandard pairs */ for (i=0; AS[i]!=NULL; i++) if (circ) s += energy_of_circ_struct(AS[i], structure); else s += energy_of_struct(AS[i], structure); real_en = s/i; } string = (mis) ? consens_mis((const char **) AS) : consensus((const char **) AS); ajFmtPrintF(outf,"%s\n%s", string, structure); ajFmtPrintF(outf," (%6.2f = %6.2f + %6.2f) \n", min_en, real_en, min_en-real_en ); if (length<=2500) { char **A; A = annote(structure, (const char**) AS); if (doColor) (void) PS_rna_plot_a(string, structure, essfile, A[0], A[1]); else (void) PS_rna_plot_a(string, structure, essfile, NULL, A[1]); free(A[0]); free(A[1]);free(A); } else ajWarn("INFO: structure too long, not doing xy_plot\n"); if (doAlnPS) PS_color_aln(structure, alifile, AS, names); { /* free mfe arrays but preserve base_pair for PS_dot_plot */ struct bond *bp; bp = base_pair; base_pair = space(16); free_alifold_arrays(); /* free's base_pair */ free_alipf_arrays(); base_pair = bp; } if (pf) { double energy, kT; pair_info *pi; char * mfe_struc; mfe_struc = strdup(structure); kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */ pf_scale = exp(-(sfact*min_en)/kT/length); if (length>2000) ajWarn("scaling factor %f\n", pf_scale); /* init_alipf_fold(length); */ if (confile) strncpy(structure, ajStrGetPtr(constring), length+1); energy = (circ) ? alipf_circ_fold(AS, structure, &pi) : alipf_fold(AS, structure, &pi); if (do_backtrack) { ajFmtPrintF(outf,"%s", structure); ajFmtPrintF(outf," [%6.2f]\n", energy); } if ((istty)||(!do_backtrack)) ajFmtPrintF(outf," free energy of ensemble = %6.2f kcal/mol\n", energy); ajFmtPrintF(outf," frequency of mfe structure in ensemble %g\n", exp((energy-min_en)/kT)); if (do_backtrack) { FILE *aliout; cpair *cp; short *ptable; int k; ptable = make_pair_table(mfe_struc); ajFmtPrintF(outf,"\n# Alignment section\n\n"); aliout = ajFileGetFileptr(outf); fprintf(aliout, "%d sequences; length of alignment %d\n", n_seq, length); fprintf(aliout, "alifold output\n"); for (k=0; pi[k].i>0; k++) { pi[k].comp = (ptable[pi[k].i] == pi[k].j) ? 1:0; print_pi(pi[k], aliout); } fprintf(aliout, "%s\n", structure); free(ptable); cp = make_color_pinfo(pi); (void) PS_color_dot_plot(string, cp, dotfile); free(cp); free(mfe_struc); free(pi); } } if (cstruc!=NULL) free(cstruc); free(base_pair); (void) fflush(stdout); free(string); free(structure); for (i=0; AS[i]; i++) { free(AS[i]); free(names[i]); } ajSeqsetDel(&seq); ajStrDel(&constring); ajStrDel(&eenergy); ajStrDel(&edangles); ajStrDel(&ensbases); ajFileClose(&confile); ajFileClose(¶mfile); ajFileClose(&outf); ajFileClose(&essfile); ajFileClose(&alifile); ajFileClose(&dotfile); embExit(); return 0; }
int main(int argc, char **argv) { AjPSeqall queryseqs; AjPSeqset targetseqs; AjPSeq queryseq; const AjPSeq targetseq; AjPStr queryaln = 0; AjPStr targetaln = 0; AjPFile errorf; AjBool show = ajFalse; const char *queryseqc; const char *targetseqc; AjPMatrixf matrix; AjPSeqCvt cvt = 0; float **sub; ajint *compass = NULL; float *path = NULL; float gapopen; float gapextend; float score; float minscore; ajuint j, k; ajint querystart = 0; ajint targetstart = 0; ajint queryend = 0; ajint targetend = 0; ajint width = 0; AjPTable kmers = 0; ajint wordlen = 6; ajint oldmax = 0; ajint newmax = 0; ajuint ntargetseqs; ajuint nkmers; AjPAlign align = NULL; EmbPWordMatch maxmatch; /* match with maximum score */ /* Cursors for the current sequence being scanned, ** i.e., until which location it was scanned. ** Separate cursor/location entries for each sequence in the seqset. */ ajuint* lastlocation; EmbPWordRK* wordsw = NULL; AjPList* matchlist = NULL; embInit("supermatcher", argc, argv); matrix = ajAcdGetMatrixf("datafile"); queryseqs = ajAcdGetSeqall("asequence"); targetseqs= ajAcdGetSeqset("bsequence"); gapopen = ajAcdGetFloat("gapopen"); gapextend = ajAcdGetFloat("gapextend"); wordlen = ajAcdGetInt("wordlen"); align = ajAcdGetAlign("outfile"); errorf = ajAcdGetOutfile("errorfile"); width = ajAcdGetInt("width"); /* width for banded Smith-Waterman */ minscore = ajAcdGetFloat("minscore"); gapopen = ajRoundFloat(gapopen, 8); gapextend = ajRoundFloat(gapextend, 8); sub = ajMatrixfGetMatrix(matrix); cvt = ajMatrixfGetCvt(matrix); embWordLength(wordlen); /* seqset sequence is the reference sequence for SAM format */ ajAlignSetRefSeqIndx(align, 1); ajSeqsetTrim(targetseqs); ntargetseqs = ajSeqsetGetSize(targetseqs); AJCNEW0(matchlist, ntargetseqs); /* get tables of words */ for(k=0;k<ntargetseqs;k++) { targetseq = ajSeqsetGetseqSeq(targetseqs, k); embWordGetTable(&kmers, targetseq); ajDebug("Number of distinct kmers found so far: %d\n", ajTableGetLength(kmers)); } AJCNEW0(lastlocation, ntargetseqs); if(ajTableGetLength(kmers)<1) ajErr("no kmers found"); nkmers = embWordRabinKarpInit(kmers, &wordsw, wordlen, targetseqs); while(ajSeqallNext(queryseqs,&queryseq)) { ajSeqTrim(queryseq); queryaln = ajStrNewRes(1+ajSeqGetLen(queryseq)); ajDebug("Read '%S'\n", ajSeqGetNameS(queryseq)); for(k=0;k<ntargetseqs;k++) { lastlocation[k]=0; matchlist[k] = ajListstrNew(); } embWordRabinKarpSearch(ajSeqGetSeqS(queryseq), targetseqs, (const EmbPWordRK*)wordsw, wordlen, nkmers, matchlist, lastlocation, ajFalse); for(k=0;k<ajSeqsetGetSize(targetseqs);k++) { targetseq = ajSeqsetGetseqSeq(targetseqs, k); ajDebug("Processing '%S'\n", ajSeqGetNameS(targetseq)); if(ajListGetLength(matchlist[k])==0) { ajFmtPrintF(errorf, "No wordmatch start points for " "%s vs %s. No alignment\n", ajSeqGetNameC(queryseq),ajSeqGetNameC(targetseq)); embWordMatchListDelete(&matchlist[k]); continue; } /* only the maximum match is used as seed * (if there is more than one location with the maximum match * only the first one is used) * TODO: we should add a new option to make above limit optional */ maxmatch = embWordMatchFirstMax(matchlist[k]); supermatcher_findendpoints(maxmatch,targetseq, queryseq, &targetstart, &querystart, &targetend, &queryend); targetaln=ajStrNewRes(1+ajSeqGetLen(targetseq)); queryseqc = ajSeqGetSeqC(queryseq); targetseqc = ajSeqGetSeqC(targetseq); ajStrAssignC(&queryaln,""); ajStrAssignC(&targetaln,""); ajDebug("++ %S v %S start:%d %d end:%d %d\n", ajSeqGetNameS(targetseq), ajSeqGetNameS(queryseq), targetstart, querystart, targetend, queryend); newmax = (targetend-targetstart+2)*width; if(newmax > oldmax) { AJCRESIZE0(path,oldmax,newmax); AJCRESIZE0(compass,oldmax,newmax); oldmax=newmax; ajDebug("++ memory re/allocation for path/compass arrays" " to size: %d\n", newmax); } else { AJCSET0(path,newmax); AJCSET0(compass,newmax); } ajDebug("Calling embAlignPathCalcSWFast " "%d..%d [%d/%d] %d..%d [%d/%d] width:%d\n", querystart, queryend, (queryend - querystart + 1), ajSeqGetLen(queryseq), targetstart, targetend, (targetend - targetstart + 1), ajSeqGetLen(targetseq), width); score = embAlignPathCalcSWFast(&targetseqc[targetstart], &queryseqc[querystart], targetend-targetstart+1, queryend-querystart+1, 0,width, gapopen,gapextend, path,sub,cvt, compass,show); if(score>minscore) { embAlignWalkSWMatrixFast(path,compass,gapopen,gapextend, targetseq,queryseq, &targetaln,&queryaln, targetend-targetstart+1, queryend-querystart+1, 0,width, &targetstart,&querystart); if(!ajAlignFormatShowsSequences(align)) { ajAlignDefineCC(align, ajStrGetPtr(targetaln), ajStrGetPtr(queryaln), ajSeqGetNameC(targetseq), ajSeqGetNameC(queryseq)); ajAlignSetScoreR(align, score); } else { ajDebug(" queryaln:%S \ntargetaln:%S\n", queryaln,targetaln); embAlignReportLocal(align, queryseq, targetseq, queryaln, targetaln, querystart, targetstart, gapopen, gapextend, score, matrix, 1 + ajSeqGetOffset(queryseq), 1 + ajSeqGetOffset(targetseq) ); } ajAlignWrite(align); ajAlignReset(align); } ajStrDel(&targetaln); embWordMatchListDelete(&matchlist[k]); } ajStrDel(&queryaln); } for(k=0;k<nkmers;k++) { AJFREE(wordsw[k]->seqindxs); AJFREE(wordsw[k]->nSeqMatches); for(j=0;j<wordsw[k]->nseqs;j++) AJFREE(wordsw[k]->locs[j]); AJFREE(wordsw[k]->nnseqlocs); AJFREE(wordsw[k]->locs); AJFREE(wordsw[k]); } embWordFreeTable(&kmers); if(!ajAlignFormatShowsSequences(align)) ajMatrixfDel(&matrix); AJFREE(path); AJFREE(compass); AJFREE(kmers); AJFREE(wordsw); AJFREE(matchlist); AJFREE(lastlocation); ajAlignClose(align); ajAlignDel(&align); ajSeqallDel(&queryseqs); ajSeqDel(&queryseq); ajSeqsetDel(&targetseqs); ajFileClose(&errorf); embExit(); return 0; }
void emboss_copy(AjPSeqset seqset, char ***retseqs, AINFO *info) { ajint n; ajint maxlen; ajint len; char **seqs; const AjPSeq seq = NULL; ajint i=0; const AjPStr fmt=NULL; const char *p=NULL; char c='\0'; /* char *q=NULL; AjPSelexseq sqdata=NULL; AjPSelexdata sdata=NULL; */ ajint cnt=0; info->name = NULL; info->rf=NULL; info->cs=NULL; info->desc=NULL; info->acc=NULL; info->au=NULL; info->flags=0; AjPStr tmpstr = NULL; ajSeqsetFill(seqset); fmt = ajSeqsetGetFormat(seqset); n = ajSeqsetGetSize(seqset); ajSeqsetFmtUpper(seqset); maxlen = ajSeqsetGetLen(seqset); /* First allocate and copy sequences */ AJCNEW0(seqs,n); for(i=0; i<n; ++i) { seqs[i] = ajCharNewRes(maxlen+1); strcpy(seqs[i],ajSeqGetSeqC(ajSeqsetGetseqSeq(seqset,i))); } info->sqinfo = (SQINFO *) calloc (sizeof(SQINFO), n); for(i=0; i<n; ++i) { info->sqinfo[i].flags = 0; strcpy(info->sqinfo[i].name,""); strcpy(info->sqinfo[i].id,""); strcpy(info->sqinfo[i].acc,""); strcpy(info->sqinfo[i].desc,""); info->sqinfo[i].len = 0; info->sqinfo[i].start = 0; info->sqinfo[i].stop = 0; info->sqinfo[i].olen = 0; info->sqinfo[i].type = 0; info->sqinfo[i].ss = NULL; info->sqinfo[i].sa =NULL; } AJCNEW0(info->wgt,n); for(i=0; i<n; ++i) { info->sqinfo[i].flags = 0; info->wgt[i] = ajSeqsetGetseqWeight(seqset,i); } info->nseq = n; info->alen = maxlen; for(i=0; i<n; ++i) { seq = ajSeqsetGetseqSeq(seqset,i); if((len=ajStrGetLen(ajSeqGetNameS(seq)))) { if(len>= SQINFO_NAMELEN) len = SQINFO_NAMELEN - 1; ajStrAssignSubS(&tmpstr, ajSeqGetNameS(seq), 0, len); strcpy(info->sqinfo[i].id,ajStrGetPtr(tmpstr)); info->sqinfo[i].flags |= SQINFO_ID; strcpy(info->sqinfo[i].name,ajStrGetPtr(tmpstr)); info->sqinfo[i].flags |= SQINFO_NAME; } if((len=ajStrGetLen(ajSeqGetAccS(seq)))) { if(len>= SQINFO_NAMELEN) len = SQINFO_NAMELEN - 1; ajStrAssignSubS(&tmpstr, ajSeqGetAccS(seq), 0, len); strcpy(info->sqinfo[i].acc,ajStrGetPtr(tmpstr)); info->sqinfo[i].flags |= SQINFO_ACC; } } seq = ajSeqsetGetseqSeq(seqset,0); info->cs = ajCharNewS(ajSeqGetSeqS(seq)); info->name = ajCharNewS(ajSeqGetNameS(seq)); info->acc = ajCharNewS(ajSeqGetAccS(seq)); info->desc = ajCharNewS(ajSeqGetDescS(seq)); info->rf = ajCharNewS(ajSeqGetSeqS(seq)); /* info->rf = ajCharNewS(seq); len = ajStrGetLen(seq->Selexdata->name); info->name = ajCharNewRes(len+1); strcpy(info->name,ajStrGetPtr(seq->Selexdata->name)); len = ajStrGetLen(seq->Selexdata->de); info->desc = ajCharNewRes(len+1); sdata = seq->Selexdata; strcpy(info->desc,ajStrGetPtr(sdata->de)); len = ajStrGetLen(sdata->ac); info->acc = ajCharNewRes(len+1); strcpy(info->acc,ajStrGetPtr(sdata->ac)); len = ajStrGetLen(sdata->au); info->au = ajCharNewRes(len+1); strcpy(info->au,ajStrGetPtr(sdata->au)); if(sdata->tc[0] || sdata->tc[1]) { info->flags |= AINFO_TC; info->tc1 = sdata->tc[0]; info->tc2 = sdata->tc[1]; } if(sdata->nc[0] || sdata->nc[1]) { info->flags |= AINFO_NC; info->nc1 = sdata->nc[0]; info->nc2 = sdata->nc[1]; } if(sdata->ga[0] || sdata->ga[1]) { info->flags |= AINFO_GA; info->ga1 = sdata->ga[0]; info->ga2 = sdata->ga[1]; } for(i=0;i<n;++i) { seq = ajSeqsetGetseqSeq(seqset,i); sqdata = seq->Selexdata->sq; if((len=ajStrGetLen(sqdata->name))) { if(len<64) strcpy(info->sqinfo[i].name,ajStrGetPtr(sqdata->name)); else strncpy(info->sqinfo[i].name,ajStrGetPtr(sqdata->name),63); info->sqinfo[i].name[63]='\0'; info->sqinfo[i].flags |= SQINFO_NAME; } / * if((len=ajStrGetLen(sqdata->id))) { if(len<64) strcpy(info->sqinfo[i].id,ajStrGetPtr(sqdata->id)); else strncpy(info->sqinfo[i]->id,ajStrGetPtr(sqdata->id),63); info->sqinfo[i].id[63]='\0'; info->sqinfo[i].flags |= SQINFO_ID; } * / strcpy(info->sqinfo[i].id,info->sqinfo[i].name); info->sqinfo[i].flags |= SQINFO_ID; if((len=ajStrGetLen(sqdata->ac))) { if(len<64) strcpy(info->sqinfo[i].acc,ajStrGetPtr(sqdata->ac)); else strncpy(info->sqinfo[i].acc,ajStrGetPtr(sqdata->ac),63); info->sqinfo[i].acc[63]='\0'; info->sqinfo[i].flags |= SQINFO_ACC; } if((len=ajStrGetLen(sqdata->de))) { if(len<127) strcpy(info->sqinfo[i].desc,ajStrGetPtr(sqdata->de)); else strncpy(info->sqinfo[i].desc,ajStrGetPtr(sqdata->de),127); info->sqinfo[i].desc[127]='\0'; info->sqinfo[i].flags |= SQINFO_DESC; } if(sqdata->start || sqdata->stop || sqdata ->len) { info->sqinfo[i].start = sqdata->start; info->sqinfo[i].stop = sqdata->stop; info->sqinfo[i].olen = sqdata->len; info->sqinfo[i].flags |= SQINFO_START; info->sqinfo[i].flags |= SQINFO_STOP; info->sqinfo[i].flags |= SQINFO_OLEN; } if(ajStrGetLen(seq->Selexdata->ss)) { info->sqinfo[i].ss = ajCharNewRes(maxlen+1); p = ajStrGetPtr(seq->Selexdata->ss); q = info->sqinfo[i].ss; while((c==*p)) { if(c=='.' || c==' ' || c=='_' || c=='-') *q++ = c; ++p; } *q = '\0'; info->sqinfo[i].flags |= SQINFO_SS; } } } / * } */ for(i=0; i<n; ++i) { info->sqinfo[i].type = kOtherSeq; if(ajSeqsetIsDna(seqset)) info->sqinfo[i].type = kDNA; if(ajSeqsetIsRna(seqset)) info->sqinfo[i].type = kRNA; if(ajSeqsetIsProt(seqset)) info->sqinfo[i].type = kAmino; info->sqinfo[i].flags |= SQINFO_TYPE; seq = ajSeqsetGetseqSeq(seqset,i); p = ajSeqGetSeqC(seq); cnt = 0; while((c=*p)) { if(!(c=='.' || c==' ' || c=='_' || c=='-' || c=='~')) ++cnt; ++p; } info->sqinfo[i].len = cnt; info->sqinfo[i].flags |= SQINFO_LEN; } *retseqs = seqs; ajStrDel(&tmpstr); return; }
int main(int argc, char **argv) { AjPSeqset seqset; AjPSeqall seqall; AjPSeq queryseq; const AjPSeq targetseq; ajint wordlen; AjPTable wordsTable = NULL; AjPList* matchlist = NULL; AjPFile logfile; AjPFeattable* seqsetftables = NULL; AjPFeattable seqallseqftable = NULL; AjPFeattabOut ftoutforseqsetseq = NULL; AjPFeattabOut ftoutforseqallseq = NULL; AjPAlign align = NULL; AjIList iter = NULL; ajint targetstart; ajint querystart; ajint len; ajuint i, j; ajulong nAllMatches = 0; ajulong sumAllScore = 0; AjBool dumpAlign = ajTrue; AjBool dumpFeature = ajTrue; AjBool checkmode = ajFalse; EmbPWordRK* wordsw = NULL; ajuint npatterns = 0; ajuint seqsetsize; ajuint nmatches; ajuint* nmatchesseqset; ajuint* lastlocation; /* Cursors for Rabin-Karp search. */ /* Shows until what point the query sequence was * scanned for a pattern sequences in the seqset. */ char* paddedheader = NULL; const char* header; AjPStr padding; header = "Pattern %S #pat-sequences #all-matches avg-match-length\n"; padding = ajStrNew(); embInit("wordmatch", argc, argv); wordlen = ajAcdGetInt("wordsize"); seqset = ajAcdGetSeqset("asequence"); seqall = ajAcdGetSeqall("bsequence"); logfile = ajAcdGetOutfile("logfile"); dumpAlign = ajAcdGetToggle("dumpalign"); dumpFeature = ajAcdGetToggle("dumpfeat"); if(dumpAlign) { align = ajAcdGetAlign("outfile"); ajAlignSetExternal(align, ajTrue); } seqsetsize = ajSeqsetGetSize(seqset); ajSeqsetTrim(seqset); AJCNEW0(matchlist, seqsetsize); AJCNEW0(seqsetftables, seqsetsize); AJCNEW0(nmatchesseqset, seqsetsize); if (dumpFeature) { ftoutforseqsetseq = ajAcdGetFeatout("aoutfeat"); ftoutforseqallseq = ajAcdGetFeatout("boutfeat"); } checkmode = !dumpFeature && !dumpAlign; embWordLength(wordlen); ajFmtPrintF(logfile, "Small sequence/file for constructing" " target patterns: %S\n", ajSeqsetGetUsa(seqset)); ajFmtPrintF(logfile, "Large sequence/file to be scanned" " for patterns: %S\n", ajSeqallGetUsa(seqall)); ajFmtPrintF(logfile, "Number of sequences in the patterns file: %u\n", seqsetsize); ajFmtPrintF(logfile, "Pattern/word length: %u\n", wordlen); for(i=0;i<seqsetsize;i++) { targetseq = ajSeqsetGetseqSeq(seqset, i); embWordGetTable(&wordsTable, targetseq); } AJCNEW0(lastlocation, seqsetsize); if(ajTableGetLength(wordsTable)>0) { npatterns = embWordRabinKarpInit(wordsTable, &wordsw, wordlen, seqset); ajFmtPrintF(logfile, "Number of patterns/words found: %u\n", npatterns); while(ajSeqallNext(seqall,&queryseq)) { for(i=0;i<seqsetsize;i++) { lastlocation[i]=0; if (!checkmode) matchlist[i] = ajListstrNew(); } nmatches = embWordRabinKarpSearch( ajSeqGetSeqS(queryseq), seqset, (EmbPWordRK const *)wordsw, wordlen, npatterns, matchlist, lastlocation, checkmode); nAllMatches += nmatches; if (checkmode) continue; for(i=0;i<seqsetsize;i++) { if(ajListGetLength(matchlist[i])>0) { iter = ajListIterNewread(matchlist[i]) ; while(embWordMatchIter(iter, &targetstart, &querystart, &len, &targetseq)) { if(dumpAlign) { ajAlignDefineSS(align, targetseq, queryseq); ajAlignSetScoreI(align, len); /* ungapped alignment means same length * for both sequences */ ajAlignSetSubRange(align, targetstart, 1, len, ajSeqIsReversed(targetseq), ajSeqGetLen(targetseq), querystart, 1, len, ajSeqIsReversed(queryseq), ajSeqGetLen(queryseq)); } } if(dumpAlign) { ajAlignWrite(align); ajAlignReset(align); } if(ajListGetLength(matchlist[i])>0 && dumpFeature) { embWordMatchListConvToFeat(matchlist[i], &seqsetftables[i], &seqallseqftable, targetseq, queryseq); ajFeattableWrite(ftoutforseqallseq, seqallseqftable); ajFeattableDel(&seqallseqftable); } ajListIterDel(&iter); } embWordMatchListDelete(&matchlist[i]); } } /* search completed, now report statistics */ for(i=0;i<npatterns;i++) { sumAllScore += wordsw[i]->lenMatches; for(j=0;j<wordsw[i]->nseqs;j++) nmatchesseqset[wordsw[i]->seqindxs[j]] += wordsw[i]->nSeqMatches[j]; } ajFmtPrintF(logfile, "Number of sequences in the file scanned " "for patterns: %u\n", ajSeqallGetCount(seqall)); ajFmtPrintF(logfile, "Number of all matches: %Lu" " (wordmatch finds exact matches only)\n", nAllMatches); if(nAllMatches>0) { ajFmtPrintF(logfile, "Sum of match lengths: %Lu\n", sumAllScore); ajFmtPrintF(logfile, "Average match length: %.2f\n", sumAllScore*1.0/nAllMatches); ajFmtPrintF(logfile, "\nDistribution of the matches among pattern" " sequences:\n"); ajFmtPrintF(logfile, "-----------------------------------------" "-----------\n"); for(i=0;i<ajSeqsetGetSize(seqset);i++) { if (nmatchesseqset[i]>0) ajFmtPrintF(logfile, "%-42s: %8u\n", ajSeqGetNameC(ajSeqsetGetseqSeq(seqset, i)), nmatchesseqset[i]); ajFeattableWrite(ftoutforseqsetseq, seqsetftables[i]); ajFeattableDel(&seqsetftables[i]); } ajFmtPrintF(logfile, "\nPattern statistics:\n"); ajFmtPrintF(logfile, "-------------------\n"); if(wordlen>7) ajStrAppendCountK(&padding, ' ', wordlen-7); paddedheader = ajFmtString(header,padding); ajFmtPrintF(logfile, paddedheader); for(i=0;i<npatterns;i++) if (wordsw[i]->nMatches>0) ajFmtPrintF(logfile, "%-7s: %12u %12u %17.2f\n", wordsw[i]->word->fword, wordsw[i]->nseqs, wordsw[i]->nMatches, wordsw[i]->lenMatches*1.0/wordsw[i]->nMatches); } } for(i=0;i<npatterns;i++) { for(j=0;j<wordsw[i]->nseqs;j++) AJFREE(wordsw[i]->locs[j]); AJFREE(wordsw[i]->locs); AJFREE(wordsw[i]->seqindxs); AJFREE(wordsw[i]->nnseqlocs); AJFREE(wordsw[i]->nSeqMatches); AJFREE(wordsw[i]); } embWordFreeTable(&wordsTable); AJFREE(wordsw); AJFREE(matchlist); AJFREE(lastlocation); AJFREE(nmatchesseqset); AJFREE(seqsetftables); if(dumpAlign) { ajAlignClose(align); ajAlignDel(&align); } if(dumpFeature) { ajFeattabOutDel(&ftoutforseqsetseq); ajFeattabOutDel(&ftoutforseqallseq); } ajFileClose(&logfile); ajSeqallDel(&seqall); ajSeqsetDel(&seqset); ajSeqDel(&queryseq); ajStrDel(&padding); AJFREE(paddedheader); embExit(); return 0; }
/* @prog seqnr ************************************************************** ** ** Removes redundancy from DHF files (domain hits files) or other files of ** sequences. ** ****************************************************************************/ int main(int argc, char **argv) { /* Variable declarations */ AjPList in = NULL; /* Names of domain hits files (input). */ AjPStr inname = NULL; /* Full name of the current DHF file. */ AjPFile inf = NULL; /* Current DHF file. */ EmbPHitlist infhits = NULL; /* Hitlist from DHF file */ AjBool dosing = ajFalse; /* Filter using singlet sequences. */ AjPDir singlets = NULL; /* Singlets (input). */ AjBool dosets = ajFalse; /* Filter using sets of sequences. */ AjPDir insets = NULL; /* Sets (input). */ AjPStr mode = NULL; /* Mode of operation */ ajint moden = 0; /* Mode 1: single threshold for redundancy removal, 2: lower and upper thresholds for redundancy removal. */ float thresh = 0.0; /* Threshold for non-redundancy. */ float threshlow = 0.0; /* Threshold (lower limit). */ float threshup = 0.0; /* Threshold (upper limit). */ AjPMatrixf matrix = NULL; /* Substitution matrix. */ float gapopen = 0.0; /* Gap insertion penalty. */ float gapextend = 0.0; /* Gap extension penalty. */ AjPDirout out = NULL; /* Domain hits files (output). */ AjPFile outf = NULL; /* Current DHF file (output). */ AjBool dored = ajFalse; /* True if redundant hits are output. */ AjPDirout outred = NULL; /* DHF files for redundant hits (output).*/ AjPFile redf = NULL; /* Current DHF file redundancy (output). */ AjPStr outname = NULL; /* Name of output file (re-used). */ AjPFile logf = NULL; /* Log file pointer. */ AjBool ok = ajFalse; /* Housekeeping. */ AjPSeqset seqset = NULL; /* Seqset (re-used). */ AjPSeqin seqin = NULL; /* Seqin (re-used). */ AjPList seq_list = NULL; /* Main list for redundancy removal. */ EmbPDmxNrseq seq_tmp = NULL; /* Temp. pointer for making seq_list. */ ajint seq_siz = 0; /* Size of seq_list. */ AjPUint keep = NULL; /* 1: Sequence in seq_list was classed as non-redundant, 0: redundant. */ AjPUint nokeep = NULL; /* Inversion of keep array. */ ajint nseqnr = 0; /* No. non-redundant seqs. in seq_list. */ AjPStr filtername= NULL; /* Name of filter file (re-used). */ AjPFile filterf = NULL; /* Current filter file. */ EmbPHitlist hitlist = NULL; /* Hitlist from input file (re-used). */ AjPScopalg scopalg = NULL; /* Scopalg from input file. */ ajint x = 0; /* Housekeeping. */ /* Read data from acd. */ embInitPV("seqnr",argc,argv,"DOMSEARCH",VERSION); in = ajAcdGetDirlist("dhfinpath"); dosing = ajAcdGetToggle("dosing"); singlets = ajAcdGetDirectory("singletsdir"); dosets = ajAcdGetToggle("dosets"); insets = ajAcdGetDirectory("insetsdir"); mode = ajAcdGetListSingle("mode"); thresh = ajAcdGetFloat("thresh"); threshlow = ajAcdGetFloat("threshlow"); threshup = ajAcdGetFloat("threshup"); matrix = ajAcdGetMatrixf("matrix"); gapopen = ajAcdGetFloat("gapopen"); gapextend = ajAcdGetFloat("gapextend"); out = ajAcdGetOutdir("dhfoutdir"); dored = ajAcdGetToggle("dored"); outred = ajAcdGetOutdir("redoutdir"); logf = ajAcdGetOutfile("logfile"); /* Housekeeping. */ filtername = ajStrNew(); outname = ajStrNew(); if(!(ajStrToInt(mode, &moden))) ajFatal("Could not parse ACD node option"); /* Process each DHF (input) in turn. */ while(ajListPop(in,(void **)&inname)) { ajFmtPrint("Processing %S\n", inname); ajFmtPrintF(logf, "//\n%S\n", inname); seq_list = ajListNew(); keep = ajUintNew(); nokeep = ajUintNew(); /**********************************/ /* Open DHF file */ /**********************************/ if((inf = ajFileNewInNameS(inname)) == NULL) ajFatal("Could not open DHF file %S", inname); /* Read DHF file. */ ok = ajFalse; if(!(infhits = embHitlistReadFasta(inf))) { ajWarn("embHitlistReadFasta call failed in seqnr"); ajFmtPrintF(logf, "embHitlistReadFasta call failed in seqnr\n"); /* Read sequence set instead. */ seqset = ajSeqsetNew(); seqin = ajSeqinNew(); ajSeqinUsa(&seqin, inname); if(!(ajSeqsetRead(seqset, seqin))) ajFatal("SeqsetRead failed in seqsearch_psialigned"); if(ajSeqsetGetSize(seqset)) ok = ajTrue; } else if(infhits->N) ok = ajTrue; /* Close DHF file. */ ajFileClose(&inf); /* Process empty DHF files (should never occur). */ if(!ok) { ajWarn("Empty input file %S\n", inname); ajFmtPrintF(logf, "Empty input file %S\n", inname); if(infhits) embHitlistDel(&infhits); if(seqset) ajSeqsetDel(&seqset); if(seqin) ajSeqinDel(&seqin); continue; } /* 1. Create list of sequences from the main input directory.. */ if(infhits) { for(x=0; x<infhits->N; x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); ajStrAssignS(&seq_tmp->Seq->Acc,infhits->hits[x]->Acc); ajStrAssignS(&seq_tmp->Seq->Seq,infhits->hits[x]->Seq); ajListPushAppend(seq_list,seq_tmp); } } else { for(x=0;x<ajSeqsetGetSize(seqset);x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x)); ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x)); ajListPushAppend(seq_list,seq_tmp); } ajSeqsetDel(&seqset); ajSeqinDel(&seqin); } /**********************************/ /* Open singlets filter file */ /**********************************/ if(dosing) { /* Open singlets file. */ ajStrAssignS(&filtername, inname); ajFilenameTrimPathExt(&filtername); ajStrInsertS(&filtername, 0, ajDirGetPath(singlets)); ajStrAppendC(&filtername, "."); ajStrAppendS(&filtername, ajDirGetExt(singlets)); if((filterf = ajFileNewInNameS(filtername)) == NULL) { ajWarn("Could not open DHF file %S", filtername); ajFmtPrint("Could not open singlets filter file %S", filtername); } else { /* Read DHF file. */ ok = ajFalse; if(!(hitlist = embHitlistReadFasta(filterf))) { ajWarn("embHitlistReadFasta call failed in seqnr"); ajFmtPrintF(logf, "embHitlistReadFasta call failed in seqnr\n"); /* Read sequence set instead. */ seqset = ajSeqsetNew(); seqin = ajSeqinNew(); ajSeqinUsa(&seqin, inname); if(!(ajSeqsetRead(seqset, seqin))) ajFatal("SeqsetRead failed in seqnr"); if(ajSeqsetGetSize(seqset)) ok = ajTrue; } else if(hitlist->N) ok = ajTrue; /* Close DHF file. */ ajFileClose(&filterf); /* Process empty DHF files (should never occur). */ if(!ok) { ajWarn("Empty singlets filter file %S\n", filtername); ajFmtPrintF(logf, "Empty singlets filter file %S\n", filtername); /* No continue this time. */ } /* 2. Add sequences from filter directories to List but mark them up (they are considered in the redundancy calculation but never appear in the output files). */ if(hitlist) { for(x=0; x<hitlist->N; x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc,hitlist->hits[x]->Acc); ajStrAssignS(&seq_tmp->Seq->Seq,hitlist->hits[x]->Seq); ajListPushAppend(seq_list,seq_tmp); } embHitlistDel(&hitlist); } else { for(x=0;x<ajSeqsetGetSize(seqset);x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x)); ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x)); ajListPushAppend(seq_list,seq_tmp); } ajSeqsetDel(&seqset); ajSeqinDel(&seqin); } } } /**********************************/ /* Open sets filter file */ /**********************************/ if(dosets) { /* Open sets file. */ ajStrAssignS(&filtername, inname); ajFilenameTrimPathExt(&filtername); ajStrInsertS(&filtername, 0, ajDirGetPath(insets)); ajStrAppendC(&filtername, "."); ajStrAppendS(&filtername, ajDirGetExt(insets)); if((filterf = ajFileNewInNameS(filtername)) == NULL) { ajWarn("Could not open DAF file %S", filtername); ajFmtPrint("Could not open sets filter file %S", filtername); } else { /* Read DAF file. */ ok = ajFalse; if(!(ajDmxScopalgRead(filterf, &scopalg))) { ajWarn("ajDmxScopalgRead call failed in seqnr"); ajFmtPrintF(logf, "ajDmxScopalgRead call failed in seqnr\n"); /* Read sequence set instead. */ seqset = ajSeqsetNew(); seqin = ajSeqinNew(); ajSeqinUsa(&seqin, inname); if(!(ajSeqsetRead(seqset, seqin))) ajFatal("SeqsetRead failed in seqnr"); if(ajSeqsetGetSize(seqset)) ok = ajTrue; } else if(scopalg->N) ok = ajTrue; /* Close DHF file. */ ajFileClose(&filterf); /* Process empty DHF files (should never occur). */ if(!ok) { ajWarn("Empty sets filter file %S\n", filtername); ajFmtPrintF(logf, "Empty sets filter file %S\n", filtername); /* No continue this time. */ } /* 2. Add sequences from filter directories to List but mark them up (they are considered in the redundancy calculation but never appear in the output files).. */ if(scopalg) { for(x=0; x<scopalg->N; x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc,scopalg->Codes[x]); ajStrAssignS(&seq_tmp->Seq->Seq,scopalg->Seqs[x]); /* Remove gap char's & whitespace. */ ajStrRemoveGap(&seq_tmp->Seq->Seq); ajListPushAppend(seq_list,seq_tmp); } ajDmxScopalgDel(&scopalg); } else { for(x=0;x<ajSeqsetGetSize(seqset);x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x)); ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x)); ajListPushAppend(seq_list,seq_tmp); } ajSeqsetDel(&seqset); ajSeqinDel(&seqin); } } } /* 4. Identify redundant domains.. */ if(moden == 1) { if((!embDmxSeqNR(seq_list, &keep, &nseqnr, matrix, gapopen, gapextend, thresh, ajTrue))) ajFatal("embDmxSeqNR failure in seqnr"); } else { if((!embDmxSeqNRRange(seq_list, &keep, &nseqnr, matrix, gapopen, gapextend, threshlow, threshup, ajTrue))) ajFatal("embDmxSeqNR failure in seqnr"); } seq_siz = ajListGetLength(seq_list); for(x=0; x<seq_siz; x++) if(ajUintGet(keep, x) == 1) ajUintPut(&nokeep, x, 0); else ajUintPut(&nokeep, x, 1); /* Create output files. */ ajStrAssignS(&outname, inname); ajFilenameTrimPathExt(&outname); outf = ajFileNewOutNameDirS(outname, out); if(dored) redf = ajFileNewOutNameDirS(outname, outred); /* 5. Write non-redundant domains to main output directory. 6. If specified, write redundant domains to output directory. */ embHitlistWriteSubsetFasta(outf, infhits, keep); if(dored) embHitlistWriteSubsetFasta(redf, infhits, nokeep); embHitlistDel(&infhits); ajFileClose(&outf); ajFileClose(&redf); ajStrDel(&inname); while(ajListPop(seq_list, (void **) &seq_tmp)) { ajSeqDel(&seq_tmp->Seq); AJFREE(seq_tmp); } ajListFree(&seq_list); ajUintDel(&keep); ajUintDel(&nokeep); } /* Tidy up. */ ajListFree(&in); if(singlets) ajDirDel(&singlets); if(insets) ajDirDel(&insets); ajDiroutDel(&out); if(outred) ajDiroutDel(&outred); ajFileClose(&logf); ajMatrixfDel(&matrix); ajStrDel(&filtername); ajStrDel(&outname); ajStrDel(&mode); embExit(); return 0; }