int main(int argc, char **argv) { AjPSeqall nucseq; /* input nucleic sequences */ AjPSeqset protseq; /* input aligned protein sequences */ AjPSeqout seqout; AjPSeq nseq; /* next nucleic sequence to align */ const AjPSeq pseq; /* next protein sequence use in alignment */ AjPTrn trnTable; AjPSeq pep; /* translation of nseq */ AjPStr tablelist; ajint table; AjPSeqset outseqset; /* set of aligned nucleic sequences */ ajint proteinseqcount = 0; AjPStr degapstr = NULL; /* used to check if it matches with START removed */ AjPStr degapstr2 = NULL; AjPStr codon = NULL; /* holds temporary codon to check if is START */ char aa; /* translated putative START codon */ ajint type; /* returned type of the putative START codon */ /* start position of guide protein in translation */ ajlong pos = 0; AjPSeq newseq = NULL; /* output aligned nucleic sequence */ ajint frame; embInit("tranalign", argc, argv); nucseq = ajAcdGetSeqall("asequence"); protseq = ajAcdGetSeqset("bsequence"); tablelist = ajAcdGetListSingle("table"); seqout = ajAcdGetSeqoutset("outseq"); outseqset = ajSeqsetNew(); degapstr = ajStrNew(); /* initialise the translation table */ ajStrToInt(tablelist, &table); trnTable = ajTrnNewI(table); ajSeqsetFill(protseq); while(ajSeqallNext(nucseq, &nseq)) { if((pseq = ajSeqsetGetseqSeq(protseq, proteinseqcount++)) == NULL) ajErr("No guide protein sequence available for " "nucleic sequence %S", ajSeqGetNameS(nseq)); ajDebug("Aligning %S and %S\n", ajSeqGetNameS(nseq), ajSeqGetNameS(pseq)); /* get copy of pseq string with no gaps */ ajStrAssignS(°apstr, ajSeqGetSeqS(pseq)); ajStrRemoveGap(°apstr); /* ** for each translation frame look for subset of pep that ** matches pseq */ for(frame = 1; frame <4; frame++) { ajDebug("trying frame %d\n", frame); pep = ajTrnSeqOrig(trnTable, nseq, frame); degapstr2 = ajStrNew(); ajStrAssignRef(°apstr2, degapstr); pos = ajStrFindCaseS(ajSeqGetSeqS(pep), degapstr); /* ** we might have a START codon that should be translated as 'M' ** we need to check if there is a match after a possible START ** codon */ if(pos == -1 && ajStrGetLen(degapstr) > 1 && (ajStrGetPtr(degapstr)[0] == 'M' || ajStrGetPtr(degapstr)[0] == 'm')) { /* see if pep minus the first character is a match */ ajStrCutStart(°apstr2, 1); pos = ajStrFindCaseS(ajSeqGetSeqS(pep), degapstr2); /* ** pos is >= 1 if we have a match that is after the first ** residue */ if(pos >= 1) { /* point back at the putative START Methionine */ pos--; /* test if first codon is a START */ codon = ajStrNew(); ajStrAssignSubS(&codon, ajSeqGetSeqS(nseq), (pos*3)+frame-1, (pos*3)+frame+2); type = ajTrnCodonstrTypeS(trnTable, codon, &aa); if(type != 1) { /* first codon is not a valid START, force a mismatch */ pos = -1; } ajStrDel(&codon); } else { /* force 'pos == 0' to be treated as a mismatch */ pos = -1; } } ajStrDel(°apstr2); ajSeqDel(&pep); if(pos != -1) break; } if(pos == -1) ajErr("Guide protein sequence %S not found in nucleic sequence %S", ajSeqGetNameS(pseq), ajSeqGetNameS(nseq)); else { ajDebug("got a match with frame=%d\n", frame); /* extract the coding region of nseq with gaps */ newseq = ajSeqNew(); ajSeqSetNuc(newseq); ajSeqAssignNameS(newseq, ajSeqGetNameS(nseq)); ajSeqAssignDescS(newseq, ajSeqGetDescS(nseq)); tranalign_AddGaps(newseq, nseq, pseq, (pos*3)+frame-1); /* output the gapped nucleic sequence */ ajSeqsetApp(outseqset, newseq); ajSeqDel(&newseq); } ajStrRemoveWhiteExcess(°apstr); } ajSeqoutWriteSet(seqout, outseqset); ajSeqoutClose(seqout); ajTrnDel(&trnTable); ajSeqsetDel(&outseqset); ajStrDel(°apstr); ajStrDel(°apstr2); ajSeqallDel(&nucseq); ajSeqDel(&nseq); ajSeqoutDel(&seqout); ajSeqsetDel(&protseq); ajStrDel(&tablelist); embExit(); return 0; }
int main(int argc, char **argv) { ajint i; ajint numseq; ajint j = 0; ajint numres; ajint count; ajint k; ajint kmax; float defheight; float currentscale; AjPStr shade = NULL; AjPFloat pair = NULL; AjPGraph graph = NULL; AjPMatrix cmpmatrix = NULL; AjPSeqCvt cvt = NULL; AjPStr matcodes = NULL; AjBool consensus; AjBool colourbyconsensus; AjBool colourbyresidues; AjBool colourbyshade = AJFALSE; AjBool boxit; AjBool boxcol; AjBool portrait; AjBool collision; ajint identity; AjBool listoptions; ajint alternative; AjPStr altstr = NULL; AjPStr sidentity = NULL; AjPStr ssimilarity = NULL; AjPStr sother = NULL; AjPStr sboxcolval = NULL; AjPStr options = NULL; /* ajint showscore = 0; */ ajint iboxcolval = 0; ajint cidentity = RED; ajint csimilarity = GREEN; ajint cother = BLACK; float fxp; float fyp; float yincr; float y; ajint ixlen; ajint iylen; ajint ixoff; ajint iyoff; char res[2] = " "; float *score = 0; float scoremax = 0; float *identical = NULL; ajint identicalmaxindex; float *matching = NULL; ajint matchingmaxindex; float *colcheck = NULL; ajint **matrix; ajint m1 = 0; ajint m2 = 0; ajint ms = 0; ajint highindex = 0; ajint myindex; ajint *previous = 0; AjBool iscons = ajFalse; ajint currentstate = 0; ajint oldfg = 0; float fold = 0.0; ajint *colmat = 0; ajint *shadecolour = 0; /* float identthresh = 1.5; */ /* float simthresh = 1.0; */ /* float relthresh = 0.5; */ float part = 0.0; const char *cptr; ajint resbreak; float fplural; float ystart; float xmin; float xmax; float xmid; AjPTime ajtime; ajint gapcount = 0; ajint countforgap = 0; ajint boxindex; float max; ajint matsize; ajint seqperpage = 0; ajint startseq; ajint endseq; ajint newILend = 0; ajint newILstart; void *freeptr; ajint itmp; embInit("prettyplot", argc, argv); seqset = ajAcdGetSeqset("sequences"); numres = ajAcdGetInt("residuesperline"); resbreak = ajAcdGetInt("resbreak"); ajSeqsetFill(seqset); /* Pads sequence set with gap characters */ numseq = ajSeqsetGetSize(seqset); graph = ajAcdGetGraph("graph"); colourbyconsensus = ajAcdGetBoolean("ccolours"); colourbyresidues = ajAcdGetBoolean("docolour"); shade = ajAcdGetString("shade"); pair = ajAcdGetArray("pair"); identity = ajAcdGetInt("identity"); boxit = ajAcdGetBoolean("box"); ajtime = ajTimeNewTodayFmt("daytime"); ajSeqsetTrim(seqset); /* offset = ajSeqsetGetOffset(seqset); Unused */ ajGraphAppendTitleS(graph, ajSeqsetGetUsa(seqset)); if(boxit) { AJCNEW(seqboxptr, numseq); for(i=0;i<numseq;i++) AJCNEW(seqboxptr[i], ajSeqsetGetLen(seqset)); } boxcol = ajAcdGetBoolean("boxcol"); sboxcolval = ajAcdGetString("boxuse"); if(boxcol) { iboxcolval = ajGraphicsCheckColourS(sboxcolval); if(iboxcolval == -1) iboxcolval = GREY; } consensus = ajAcdGetBoolean("consensus"); if(consensus) { AJCNEW(constr, ajSeqsetGetLen(seqset)+1); constr[0] = '\0'; } shownames = ajAcdGetBoolean("name"); shownumbers = ajAcdGetBoolean("number"); charlen = ajAcdGetInt("maxnamelen"); fplural = ajAcdGetFloat("plurality"); portrait = ajAcdGetBoolean("portrait"); collision = ajAcdGetBoolean("collision"); listoptions = ajAcdGetBoolean("listoptions"); altstr = ajAcdGetListSingle("alternative"); cmpmatrix = ajAcdGetMatrix("matrixfile"); ajStrToInt(altstr, &alternative); matrix = ajMatrixGetMatrix(cmpmatrix); cvt = ajMatrixGetCvt(cmpmatrix); matsize = ajMatrixGetSize(cmpmatrix); AJCNEW(identical,matsize); AJCNEW(matching,matsize); AJCNEW(colcheck,matsize); numgaps = numres/resbreak; numgaps--; if(portrait) { ajGraphicsSetPortrait(1); ystart = (float) 75.0; } else ystart = (float) 75.0; /* pair is an array of three non-negative floats */ /* identthresh = ajFloatGet(pair,0); Unused */ /* simthresh = ajFloatGet(pair,1); Unused */ /* relthresh = ajFloatGet(pair,2); Unused */ /* ** shade is a formatted 4-character string. Characters BLPW only. ** controlled by a pattern in ACD. */ if(ajStrGetLen(shade)) { AJCNEW(shadecolour,4); cptr = ajStrGetPtr(shade); for(i=0;i<4;i++){ if(cptr[i]== 'B' || cptr[i]== 'b') shadecolour[i] = BLACK; else if(cptr[i]== 'L' || cptr[i]== 'l') shadecolour[i] = BROWN; else if(cptr[i]== 'P' || cptr[i]== 'p') shadecolour[i] = WHEAT; else if(cptr[i]== 'W' || cptr[i]== 'w') shadecolour[i] = WHITE; } colourbyconsensus = colourbyresidues = ajFalse; colourbyshade = ajTrue; } /* ** we can colour by consensus or residue but not both ** if we have to choose, use the consensus */ if(colourbyconsensus && colourbyresidues) colourbyconsensus = AJFALSE; sidentity = ajAcdGetString("cidentity"); ssimilarity = ajAcdGetString("csimilarity"); sother = ajAcdGetString("cother"); if(colourbyconsensus) { cidentity = ajGraphicsCheckColourS(sidentity); if(cidentity == -1) cidentity = RED; csimilarity = ajGraphicsCheckColourS(ssimilarity); if(csimilarity == -1) csimilarity = GREEN; cother = ajGraphicsCheckColourS(sother); if(cother == -1) cother = BLACK; } else if(colourbyresidues) { matcodes = ajMatrixGetCodes(cmpmatrix); if(ajSeqsetIsProt(seqset)) colmat = ajGraphicsBasecolourNewProt(matcodes); else colmat = ajGraphicsBasecolourNewNuc(matcodes); } /* output the options used as the subtitle for the bottom of the graph */ if(listoptions) { ajStrAssignC(&options,""); ajFmtPrintAppS(&options,"-plurality %.1f",fplural); if(collision) ajStrAppendC(&options," -collision"); else ajStrAppendC(&options," -nocollision"); if(boxit) ajStrAppendC(&options," -box"); else ajStrAppendC(&options," -nobox"); if(boxcol) ajStrAppendC(&options," -boxcol"); else ajStrAppendC(&options," -noboxcol"); if(colourbyconsensus) ajStrAppendC(&options," -colbyconsensus"); else if(colourbyresidues) ajStrAppendC(&options," -colbyresidues"); else if(colourbyshade) ajStrAppendC(&options," -colbyshade"); else ajStrAppendC(&options," -nocolour"); if(alternative==2) ajStrAppendC(&options," -alt 2"); else if(alternative==1) ajStrAppendC(&options," -alt 1"); else if(alternative==3) ajStrAppendC(&options," -alt 3"); } AJCNEW(seqcolptr, numseq); for(i=0;i<numseq;i++) AJCNEW(seqcolptr[i], ajSeqsetGetLen(seqset)); AJCNEW(seqcharptr, numseq); AJCNEW(seqnames, numseq); AJCNEW(score, numseq); AJCNEW(previous, numseq); AJCNEW(seqcount, numseq); for(i=0;i<numseq;i++) { ajSeqsetFmtUpper(seqset); seqcharptr[i] = ajSeqsetGetseqSeqC(seqset, i); seqnames[i] = 0; ajStrAppendS(&seqnames[i],ajSeqsetGetseqNameS(seqset, i)); ajStrTruncateLen(&seqnames[i],charlen); previous[i] = 0; seqcount[i] = 0; } /* ** user will pass the number of residues to fit a page ** therefore we now need to calculate the size of the chars ** based on this and get the new char width. ** 'charlen' maximum characters for the name (truncated above) */ ajGraphicsGetCharsize(&defheight,¤tscale); xmin = -charlen - (float)2.0; xmax = (float)numres+(float)11.0+(float)(numres/resbreak); xmid = (xmax + xmin)/(float)2.0; ajGraphOpenWin(graph, xmin, xmax, (float)0.0, ystart+(float)1.0); ajGraphGetParamsPage(graph, &fxp,&fyp,&ixlen,&iylen,&ixoff,&iyoff); if(portrait) { itmp = ixlen; ixlen = iylen; iylen = itmp; } ajGraphicsGetCharsize(&defheight,¤tscale); ajGraphicsSetCharscale(((float)ixlen/((float)(numres+charlen+1)* (currentscale * (float) 1.5)))/ currentscale); /* ajGraphicsSetCharscale(((float)ixlen/((float)(numres+charlen)* (currentscale+(float)1.0)))/ currentscale); */ ajGraphicsGetCharsize(&defheight,¤tscale); yincr = (currentscale + (float)3.0)*(float)0.3; /* ** If we have titles (now the standard graph title and subtitle and footer) ** leave 7 rows of space for them */ y=ystart-(float)7.0; if(ajStrGetLen(options)) { fold = ajGraphicsSetCharscale(1.0); ajGraphicsDrawposTextAtmid(xmid,2.0, ajStrGetPtr(options)); ajGraphicsSetCharscale(fold); } /* if sequences per page not set then calculate it */ if(!seqperpage) { seqperpage = prettyplot_calcseqperpage(yincr,y,consensus); if(seqperpage>numseq) seqperpage=numseq; } count = 0; /* ** for boxes we need to set a foreground colour for the box lines ** and save the current foreground colour */ if(boxit && boxcol) oldfg = ajGraphicsSetFgcolour(iboxcolval); /* ** step through each residue position */ kmax = ajSeqsetGetLen(seqset) - 1; for(k=0; k<= kmax; k++) { /* reset column score array */ for(i=0;i<numseq;i++) score[i] = 0.0; /* reset matrix character testing arrays */ for(i=0;i<matsize;i++) { identical[i] = 0.0; matching[i] = 0.0; colcheck[i] = 0.0; } /* generate a score for this residue in each sequence */ for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); for(j=0;j<numseq;j++) { m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[j][k]); if(m1 && m2) score[i] += (float)matrix[m1][m2]* ajSeqsetGetseqWeight(seqset, j); } if(m1) identical[m1] += ajSeqsetGetseqWeight(seqset, i); } /* find the highest score */ highindex = -1; scoremax = INT_MIN; /*ajDebug("Scores at position %d:\n", k);*/ for(i=0;i<numseq;i++) { /*ajDebug(" seq %d: '%c' %f\n",i,seqcharptr[i][k],score[i]);*/ if(score[i] > scoremax) { scoremax = score[i]; highindex = i; } } for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if(!matching[m1]) { for(j=0;j<numseq;j++) { m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[j][k]); if(m1 && m2 && matrix[m1][m2] > 0) matching[m1] += ajSeqsetGetseqWeight(seqset, j); } } } /* find highs for matching and identical */ matchingmaxindex = 0; identicalmaxindex = 0; for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if(identical[m1] > identical[identicalmaxindex]) identicalmaxindex = m1; } for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if(matching[m1] > matching[matchingmaxindex]) matchingmaxindex = m1; else if(matching[m1] == matching[matchingmaxindex]) { if(identical[m1] > identical[matchingmaxindex]) matchingmaxindex= m1; } } iscons = ajFalse; boxindex = -1; max = -3; ajDebug("k:%2d highindex:%2d matching:%4.2f\n", k, highindex, matching[ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])]); if(highindex != -1 && matching[ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])] >= fplural) { iscons = ajTrue; boxindex = highindex; } else { for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if(matching[m1] > max) { max = matching[m1]; highindex = i; } else if(matching[m1] == max) { if(identical[m1] > identical[ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])] ) { max = matching[m1]; highindex = i; } } } if(matching[ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])] >= fplural) { iscons = ajTrue; boxindex = highindex; } } if(iscons) { if(!collision) { /* check for collisions */ if(alternative == 1) { /* check to see if this is unique for collisions */ for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if(identical[m1] >= identical[identicalmaxindex] && m1 != identicalmaxindex) iscons = ajFalse; } /*ajDebug("after (alt=1) iscons: %B",iscons);*/ } else if(alternative == 2) { for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if((matching[m1] >= matching[matchingmaxindex] && m1 != matchingmaxindex && matrix[m1][matchingmaxindex] < 0.1)|| (identical[m1] >= identical[matchingmaxindex] && m1 != matchingmaxindex)) iscons = ajFalse; } } else if(alternative == 3) { /* ** to do this check one is NOT in consensus to see if ** another score of fplural has been found */ ms = ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k]); for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if(ms != m1 && colcheck[m1] == 0.0) /* NOT in the current consensus */ for(j=0;j<numseq;j++) { m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[j][k]); if( matrix[ms][m2] < 0.1) { /* NOT in the current consensus */ if( matrix[m1][m2] > 0.1) colcheck[m1] += ajSeqsetGetseqWeight(seqset, j); } } } for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); /* if any other matches then we have a collision */ if(colcheck[m1] >= fplural) iscons = ajFalse; } /*ajDebug("after alt=2 iscons: %B", iscons);*/ } else { for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if((matching[m1] >= matching[matchingmaxindex] && m1 != matchingmaxindex && matrix[m1][matchingmaxindex] < 0.1)) iscons = ajFalse; if(identical[m1] >= identical[matchingmaxindex] && m1 != matchingmaxindex && matrix[m1][matchingmaxindex] > 0.1) iscons = ajFalse; } if(!iscons) { /* matches failed try identicals */ if(identical[identicalmaxindex] >= fplural) { iscons = ajTrue; /* ** if nothing has an equal or higher match that ** does not match highest then false */ for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if(identical[m1] >= identical[identicalmaxindex] && m1 != identicalmaxindex) iscons = ajFalse; else if(matching[m1] >= matching[identicalmaxindex] && matrix[m1][matchingmaxindex] <= 0.0) iscons = ajFalse; else if(m1 == identicalmaxindex) j = i; } if(iscons) highindex = j; } } } } if(identity) { j = 0; for(i=0;i<numseq;i++) if(seqcharptr[highindex][k] == seqcharptr[i][k]) j++; if(j<identity) iscons = ajFalse; } } /* ** Done a full line of residues ** Boxes have been defined up to this point */ if(count >= numres ) { /* check y position for next set */ y=y-(yincr*((float)numseq+(float)2.0+((float)consensus*(float)2))); if(y<yincr*((float)numseq+(float)2.0+((float)consensus*(float)2))) { /* full page - print it */ y=ystart-(float)6.0; startseq = 0; endseq = seqperpage; newILstart = newILend; newILend = k; while(startseq < numseq) { /* AJB */ /*if(startseq != 0) ajGraphNewpage(graph, AJFALSE);*/ /*ajDebug("Inner loop: startseq: %d numseq: %d endseq: %d\n", startseq, numseq, endseq);*/ if(endseq>numseq) endseq=numseq; prettyplot_fillinboxes(numseq,ajSeqsetGetLen(seqset), startseq,endseq, newILstart,newILend, numres,resbreak, boxit,boxcol,consensus, ystart,yincr,cvt); startseq = endseq; endseq += seqperpage; ajGraphNewpage(graph, AJFALSE); } } count = 0; gapcount = 0; } count++; countforgap++; for(j=0;j<numseq;j++) { /* START OF BOXES */ if(boxit) { seqboxptr[j][k] = 0; if(boxindex!=-1) { myindex = boxindex; if(matrix[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])] [ajSeqcvtGetCodeK(cvt, seqcharptr[myindex][k])] > 0) part = 1.0; else { if(identical[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])] >= fplural) part = 1.0; else part = 0.0; } if(previous[j] != part) /* draw vertical line */ seqboxptr[j][k] |= BOXLEF; if(j==0) { /* special case for horizontal line */ if(part) { currentstate = 1; /* draw hori line */ seqboxptr[j][k] |= BOXTOP; } else currentstate = 0; } else { /* j != 0 Normal case for horizontal line */ if(part != currentstate) { /*draw hori line */ seqboxptr[j][k] |= BOXTOP; currentstate = (ajint) part; } } if(j== numseq-1 && currentstate) /* draw horiline at bottom */ seqboxptr[j][k] |= BOXBOT; previous[j] = (ajint) part; } else { part = 0; if(previous[j]) { /* draw vertical line */ seqboxptr[j][k] |= BOXLEF; } previous[j] = 0; } if(count == numres || k == kmax || countforgap >= resbreak ) { /* last one on the row or a break*/ if(previous[j]) { /* draw vertical line */ seqboxptr[j][k] |= BOXRIG; } previous[j] = 0; } } /* end box */ if(boxit && boxcol) if(boxindex != -1) { myindex = boxindex; if(matrix[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])] [ajSeqcvtGetCodeK(cvt, seqcharptr[myindex][k])] > 0 || identical[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])] >= fplural ) seqboxptr[j][k] |= BOXCOLOURED; } /* END OF BOXES */ if(ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])) res[0] = seqcharptr[j][k]; else res[0] = '-'; if(colourbyconsensus) { part = (float) matrix[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])] [ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])]; if(iscons && seqcharptr[highindex][k] == seqcharptr[j][k]) seqcolptr[j][k] = cidentity; else if(part > 0.0) seqcolptr[j][k] = csimilarity; else seqcolptr[j][k] = cother; } else if(colourbyresidues) seqcolptr[j][k] = colmat[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])]; else if(iscons && colourbyshade) { part = (float) matrix[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])] [ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])]; if(part >= 1.5) seqcolptr[j][k] = shadecolour[0]; else if(part >= 1.0) seqcolptr[j][k] = shadecolour[1]; else if(part >= 0.5) seqcolptr[j][k] = shadecolour[2]; else seqcolptr[j][k] = shadecolour[3]; } else if(colourbyshade) seqcolptr[j][k] = shadecolour[3]; else seqcolptr[j][k] = BLACK; } if(consensus) { if(iscons) res[0] = seqcharptr[highindex][k]; else res[0] = '-'; strcat(constr,res); } if(countforgap >= resbreak) { gapcount++; countforgap=0; } } startseq = 0; endseq=seqperpage; newILstart = newILend; newILend = k; while(startseq < numseq) { if(startseq) ajGraphNewpage(graph, AJFALSE); /*ajDebug("Final loop: startseq: %d numseq: %d endseq: %d\n", startseq, numseq, endseq);*/ if(endseq>numseq) endseq = numseq; prettyplot_fillinboxes(numseq,ajSeqsetGetLen(seqset), startseq,endseq, newILstart,newILend, numres,resbreak, boxit,boxcol,consensus, ystart,yincr,cvt); startseq = endseq; endseq += seqperpage; } ajGraphicsGetCharsize(&defheight,¤tscale); if(boxit && boxcol) oldfg = ajGraphicsSetFgcolour(oldfg); ajGraphicsCloseWin(); ajGraphxyDel(&graph); ajStrDel(&sidentity); ajStrDel(&ssimilarity); ajStrDel(&sother); ajStrDel(&options); ajStrDel(&altstr); ajStrDel(&matcodes); for(i=0;i<numseq;i++) { ajStrDel(&seqnames[i]); AJFREE(seqcolptr[i]); if(seqboxptr) AJFREE(seqboxptr[i]); } AJFREE(seqcolptr); AJFREE(seqboxptr); AJFREE(seqnames); AJFREE(score); AJFREE(previous); AJFREE(seqcount); AJFREE(colmat); AJFREE(shadecolour); freeptr = (void *) seqcharptr; AJFREE(freeptr); AJFREE(identical); AJFREE(matching); AJFREE(colcheck); ajSeqsetDel(&seqset); ajMatrixDel(&cmpmatrix); ajStrDel(&shade); ajStrDel(&sboxcolval); ajStrDel(&sidentity); ajStrDel(&ssimilarity); ajStrDel(&sother); ajFloatDel(&pair); ajTimeDel(&ajtime); AJFREE(constr); embExit(); return 0; }
int main(int argc, char **argv) { AjPSeqset seqset = NULL; AjPStr refseq; /* input name/number of reference sequence */ ajint nrefseq; /* numeric reference sequence */ AjPMatrix matrix; /* scoring matrix structure */ ajint **sub; /* integer scoring matrix */ AjPSeqCvt cvt = 0; /* conversion table for scoring matrix */ float identity; ajint ident; float fplural; AjPStr cons; AjPSeq consensus; const AjPSeq ref; const AjPSeq seq; ajuint i; AjBool html; AjBool doheader; AjBool dousa; AjBool doname; AjBool doseqlength; AjBool doalignlength; AjBool dogaps; AjBool dogapcount; AjBool doidcount; AjBool dosimcount; AjBool dodifcount; AjBool dochange; AjBool dodesc; AjBool dowt; ajint seqlength; ajint alignlength; ajint gaps; ajint gapcount; ajint idcount; ajint simcount; ajint difcount; float change; AjPFile outfile; const AjPStr usa; const AjPStr name; AjPStr altusa; /* default name when the real name is not known */ AjPStr altname; AjPStr xxx = NULL; embInit("infoalign", argc, argv); seqset = ajAcdGetSeqset("sequence"); refseq = ajAcdGetString("refseq"); matrix = ajAcdGetMatrix("matrix"); ajSeqsetFill(seqset); outfile = ajAcdGetOutfile("outfile"); html = ajAcdGetBoolean("html"); doheader = ajAcdGetBoolean("heading"); dousa = ajAcdGetBoolean("usa"); doname = ajAcdGetBoolean("name"); doseqlength = ajAcdGetBoolean("seqlength"); doalignlength = ajAcdGetBoolean("alignlength"); dogaps = ajAcdGetBoolean("gaps"); dogapcount = ajAcdGetBoolean("gapcount"); doidcount = ajAcdGetBoolean("idcount"); dosimcount = ajAcdGetBoolean("simcount"); dodifcount = ajAcdGetBoolean("diffcount"); dochange = ajAcdGetBoolean("change"); dodesc = ajAcdGetBoolean("description"); dowt = ajAcdGetBoolean("weight"); /* consensus parameters */ fplural = ajAcdGetFloat("plurality"); identity = ajAcdGetFloat("identity"); cons = ajStrNew(); consensus = ajSeqNew(); altusa = ajStrNewC("-"); altname = ajStrNewC("-"); /* get conversion table and scoring matrix */ cvt = ajMatrixGetCvt(matrix); sub = ajMatrixGetMatrix(matrix); /* get the number of the reference sequence */ nrefseq = infoalign_Getrefseq(refseq, seqset); /* change the % plurality to the fraction of absolute total weight */ fplural = ajSeqsetGetTotweight(seqset) * fplural / 100; /* ** change the % identity to the number of identical sequences at a ** position required for consensus */ ident = ajSeqsetGetSize(seqset) * (ajint)identity / 100; /* get the consensus sequence */ embConsCalc(seqset, matrix, ajSeqsetGetSize(seqset), ajSeqsetGetLen(seqset), fplural, 0.0, ident, ajFalse, &cons); ajSeqAssignSeqS(consensus, cons); ajSeqAssignNameS(consensus,(xxx=ajStrNewC("Consensus"))); /* get the reference sequence */ if(nrefseq == -1) ref = consensus; else ref = ajSeqsetGetseqSeq(seqset, nrefseq); /* start the HTML table */ if(html) ajFmtPrintF(outfile,"<table border cellpadding=4 bgcolor=" "\"#FFFFF0\">\n"); /* print the header information */ if(doheader) { /* start the HTML table title line and output the Name header */ if(html) ajFmtPrintF(outfile, "<tr>"); else ajFmtPrintF(outfile, "%s", "# "); if(dousa) { if(html) ajFmtPrintF(outfile, "<th>USA</th>"); else ajFmtPrintF(outfile, "%-16s", "USA"); } if(doname) { if(html) ajFmtPrintF(outfile, "<th>Name</th>"); else ajFmtPrintF(outfile, "%-12s", "Name"); } if(doseqlength) { if(html) ajFmtPrintF(outfile, "<th>Sequence Length</th>"); else ajFmtPrintF(outfile, "SeqLen\t"); } if(doalignlength) { if(html) ajFmtPrintF(outfile, "<th>Aligned Length</th>"); else ajFmtPrintF(outfile, "AlignLen\t"); } if(dogaps) { if(html) ajFmtPrintF(outfile, "<th>Gaps</th>"); else ajFmtPrintF(outfile, "Gaps\t"); } if(dogapcount) { if(html) ajFmtPrintF(outfile, "<th>Gap Length</th>"); else ajFmtPrintF(outfile, "GapLen\t"); } if(doidcount) { if(html) ajFmtPrintF(outfile, "<th>Identity</th>"); else ajFmtPrintF(outfile, "Ident\t"); } if(dosimcount) { if(html) ajFmtPrintF(outfile, "<th>Similarity</th>"); else ajFmtPrintF(outfile, "Similar\t"); } if(dodifcount) { if(html) ajFmtPrintF(outfile, "<th>Difference</th>"); else ajFmtPrintF(outfile, "Differ\t"); } if(dochange) { if(html) ajFmtPrintF(outfile, "<th>%% Change</th>"); else ajFmtPrintF(outfile, "%% Change\t"); } if(dowt) { if(html) ajFmtPrintF(outfile, "<th>Weight</th>"); else ajFmtPrintF(outfile, "Weight\t"); } if(dodesc) { if(html) ajFmtPrintF(outfile, "<th>Description</th>"); else ajFmtPrintF(outfile, "Description"); } /* end the HTML table title line */ if(html) ajFmtPrintF(outfile, "</tr>\n"); else ajFmtPrintF(outfile, "\n"); } for(i=0; i<ajSeqsetGetSize(seqset); i++) { seq = ajSeqsetGetseqSeq(seqset, i); /* get the usa ('-' if unknown) */ usa = ajSeqGetUsaS(seq); if(ajStrGetLen(usa) == 0) usa = altusa; /* get the name ('-' if unknown) */ name = ajSeqGetNameS(seq); if(ajStrGetLen(name) == 0) name = altname; /* get the stats from the comparison to the reference sequence */ infoalign_Compare(ref, seq, sub, cvt, &seqlength, &alignlength, &gaps, &gapcount, &idcount, &simcount, &difcount, &change); /* start table line */ if(html) ajFmtPrintF(outfile, "<tr>"); if(dousa) infoalign_OutputStr(outfile, usa, html, (dodesc || dowt || dochange || dodifcount || dosimcount || doidcount || dogapcount || dogaps || doseqlength || doalignlength || doname), 18); if(doname) infoalign_OutputStr(outfile, name, html, (dodesc || dowt || dochange || dodifcount || dosimcount || doidcount || dogapcount || dogaps || doseqlength || doalignlength), 14); if(doseqlength) infoalign_OutputInt(outfile, seqlength, html, (dodesc || dowt || dochange || dodifcount || dosimcount || doidcount || dogapcount || dogaps || doalignlength)); if(doalignlength) infoalign_OutputInt(outfile, alignlength, html, (dodesc || dowt || dochange || dodifcount || dosimcount || doidcount || dogapcount || dogaps)); if(dogaps) infoalign_OutputInt(outfile, gaps, html, (dodesc || dowt || dochange || dodifcount || dosimcount || doidcount || dogapcount)); if(dogapcount) infoalign_OutputInt(outfile, gapcount, html, (dodesc || dowt || dochange || dodifcount || dosimcount || doidcount)); if(doidcount) infoalign_OutputInt(outfile, idcount, html, (dodesc || dowt || dochange || dodifcount || dosimcount)); if(dosimcount) infoalign_OutputInt(outfile, simcount, html, (dodesc || dowt || dochange || dodifcount)); if(dodifcount) infoalign_OutputInt(outfile, difcount, html, (dodesc || dowt || dochange)); if(dochange) infoalign_OutputFloat(outfile, change, html, (dodesc || dowt) ); if(dowt) infoalign_OutputFloat(outfile, ajSeqsetGetseqWeight(seqset,i), html, dodesc); if(dodesc) infoalign_OutputStr(outfile, ajSeqGetDescS(seq), html, ajFalse, NOLIMIT); /* end table line */ if(html) ajFmtPrintF(outfile, "</tr>\n"); else ajFmtPrintF(outfile, "\n"); } /* end the HTML table */ if(html) ajFmtPrintF(outfile, "</table>\n"); ajFileClose(&outfile); /* tidy up */ ajStrDel(&altusa); ajStrDel(&altname); ajStrDel(&xxx); ajSeqDel(&consensus); ajSeqsetDel(&seqset); ajStrDel(&refseq); ajMatrixDel(&matrix); ajStrDel(&cons); embExit(); return 0; }
void emboss_copy(AjPSeqset seqset, char ***retseqs, AINFO *info) { ajint n; ajint maxlen; ajint len; char **seqs; const AjPSeq seq = NULL; ajint i=0; const AjPStr fmt=NULL; const char *p=NULL; char c='\0'; /* char *q=NULL; AjPSelexseq sqdata=NULL; AjPSelexdata sdata=NULL; */ ajint cnt=0; info->name = NULL; info->rf=NULL; info->cs=NULL; info->desc=NULL; info->acc=NULL; info->au=NULL; info->flags=0; AjPStr tmpstr = NULL; ajSeqsetFill(seqset); fmt = ajSeqsetGetFormat(seqset); n = ajSeqsetGetSize(seqset); ajSeqsetFmtUpper(seqset); maxlen = ajSeqsetGetLen(seqset); /* First allocate and copy sequences */ AJCNEW0(seqs,n); for(i=0; i<n; ++i) { seqs[i] = ajCharNewRes(maxlen+1); strcpy(seqs[i],ajSeqGetSeqC(ajSeqsetGetseqSeq(seqset,i))); } info->sqinfo = (SQINFO *) calloc (sizeof(SQINFO), n); for(i=0; i<n; ++i) { info->sqinfo[i].flags = 0; strcpy(info->sqinfo[i].name,""); strcpy(info->sqinfo[i].id,""); strcpy(info->sqinfo[i].acc,""); strcpy(info->sqinfo[i].desc,""); info->sqinfo[i].len = 0; info->sqinfo[i].start = 0; info->sqinfo[i].stop = 0; info->sqinfo[i].olen = 0; info->sqinfo[i].type = 0; info->sqinfo[i].ss = NULL; info->sqinfo[i].sa =NULL; } AJCNEW0(info->wgt,n); for(i=0; i<n; ++i) { info->sqinfo[i].flags = 0; info->wgt[i] = ajSeqsetGetseqWeight(seqset,i); } info->nseq = n; info->alen = maxlen; for(i=0; i<n; ++i) { seq = ajSeqsetGetseqSeq(seqset,i); if((len=ajStrGetLen(ajSeqGetNameS(seq)))) { if(len>= SQINFO_NAMELEN) len = SQINFO_NAMELEN - 1; ajStrAssignSubS(&tmpstr, ajSeqGetNameS(seq), 0, len); strcpy(info->sqinfo[i].id,ajStrGetPtr(tmpstr)); info->sqinfo[i].flags |= SQINFO_ID; strcpy(info->sqinfo[i].name,ajStrGetPtr(tmpstr)); info->sqinfo[i].flags |= SQINFO_NAME; } if((len=ajStrGetLen(ajSeqGetAccS(seq)))) { if(len>= SQINFO_NAMELEN) len = SQINFO_NAMELEN - 1; ajStrAssignSubS(&tmpstr, ajSeqGetAccS(seq), 0, len); strcpy(info->sqinfo[i].acc,ajStrGetPtr(tmpstr)); info->sqinfo[i].flags |= SQINFO_ACC; } } seq = ajSeqsetGetseqSeq(seqset,0); info->cs = ajCharNewS(ajSeqGetSeqS(seq)); info->name = ajCharNewS(ajSeqGetNameS(seq)); info->acc = ajCharNewS(ajSeqGetAccS(seq)); info->desc = ajCharNewS(ajSeqGetDescS(seq)); info->rf = ajCharNewS(ajSeqGetSeqS(seq)); /* info->rf = ajCharNewS(seq); len = ajStrGetLen(seq->Selexdata->name); info->name = ajCharNewRes(len+1); strcpy(info->name,ajStrGetPtr(seq->Selexdata->name)); len = ajStrGetLen(seq->Selexdata->de); info->desc = ajCharNewRes(len+1); sdata = seq->Selexdata; strcpy(info->desc,ajStrGetPtr(sdata->de)); len = ajStrGetLen(sdata->ac); info->acc = ajCharNewRes(len+1); strcpy(info->acc,ajStrGetPtr(sdata->ac)); len = ajStrGetLen(sdata->au); info->au = ajCharNewRes(len+1); strcpy(info->au,ajStrGetPtr(sdata->au)); if(sdata->tc[0] || sdata->tc[1]) { info->flags |= AINFO_TC; info->tc1 = sdata->tc[0]; info->tc2 = sdata->tc[1]; } if(sdata->nc[0] || sdata->nc[1]) { info->flags |= AINFO_NC; info->nc1 = sdata->nc[0]; info->nc2 = sdata->nc[1]; } if(sdata->ga[0] || sdata->ga[1]) { info->flags |= AINFO_GA; info->ga1 = sdata->ga[0]; info->ga2 = sdata->ga[1]; } for(i=0;i<n;++i) { seq = ajSeqsetGetseqSeq(seqset,i); sqdata = seq->Selexdata->sq; if((len=ajStrGetLen(sqdata->name))) { if(len<64) strcpy(info->sqinfo[i].name,ajStrGetPtr(sqdata->name)); else strncpy(info->sqinfo[i].name,ajStrGetPtr(sqdata->name),63); info->sqinfo[i].name[63]='\0'; info->sqinfo[i].flags |= SQINFO_NAME; } / * if((len=ajStrGetLen(sqdata->id))) { if(len<64) strcpy(info->sqinfo[i].id,ajStrGetPtr(sqdata->id)); else strncpy(info->sqinfo[i]->id,ajStrGetPtr(sqdata->id),63); info->sqinfo[i].id[63]='\0'; info->sqinfo[i].flags |= SQINFO_ID; } * / strcpy(info->sqinfo[i].id,info->sqinfo[i].name); info->sqinfo[i].flags |= SQINFO_ID; if((len=ajStrGetLen(sqdata->ac))) { if(len<64) strcpy(info->sqinfo[i].acc,ajStrGetPtr(sqdata->ac)); else strncpy(info->sqinfo[i].acc,ajStrGetPtr(sqdata->ac),63); info->sqinfo[i].acc[63]='\0'; info->sqinfo[i].flags |= SQINFO_ACC; } if((len=ajStrGetLen(sqdata->de))) { if(len<127) strcpy(info->sqinfo[i].desc,ajStrGetPtr(sqdata->de)); else strncpy(info->sqinfo[i].desc,ajStrGetPtr(sqdata->de),127); info->sqinfo[i].desc[127]='\0'; info->sqinfo[i].flags |= SQINFO_DESC; } if(sqdata->start || sqdata->stop || sqdata ->len) { info->sqinfo[i].start = sqdata->start; info->sqinfo[i].stop = sqdata->stop; info->sqinfo[i].olen = sqdata->len; info->sqinfo[i].flags |= SQINFO_START; info->sqinfo[i].flags |= SQINFO_STOP; info->sqinfo[i].flags |= SQINFO_OLEN; } if(ajStrGetLen(seq->Selexdata->ss)) { info->sqinfo[i].ss = ajCharNewRes(maxlen+1); p = ajStrGetPtr(seq->Selexdata->ss); q = info->sqinfo[i].ss; while((c==*p)) { if(c=='.' || c==' ' || c=='_' || c=='-') *q++ = c; ++p; } *q = '\0'; info->sqinfo[i].flags |= SQINFO_SS; } } } / * } */ for(i=0; i<n; ++i) { info->sqinfo[i].type = kOtherSeq; if(ajSeqsetIsDna(seqset)) info->sqinfo[i].type = kDNA; if(ajSeqsetIsRna(seqset)) info->sqinfo[i].type = kRNA; if(ajSeqsetIsProt(seqset)) info->sqinfo[i].type = kAmino; info->sqinfo[i].flags |= SQINFO_TYPE; seq = ajSeqsetGetseqSeq(seqset,i); p = ajSeqGetSeqC(seq); cnt = 0; while((c=*p)) { if(!(c=='.' || c==' ' || c=='_' || c=='-' || c=='~')) ++cnt; ++p; } info->sqinfo[i].len = cnt; info->sqinfo[i].flags |= SQINFO_LEN; } *retseqs = seqs; ajStrDel(&tmpstr); return; }