コード例 #1
0
int main(int argc, char **argv)
{
    AjPSeqset seqset;
    AjPSeqall seqall;
    AjPSeq seq;
    ajint i = 0;
    AjPStr kimout = NULL;
    AjPStr dir = NULL;
    AjPFile obofile = NULL;
    AjPFile resfile = NULL;
    AjPDir taxdir = NULL;

    embInit("ajtest", argc, argv);

    seqall = ajAcdGetSeqall ("sequence");
    seqset = ajAcdGetSeqset ("bsequence");
    dir = ajAcdGetOutdirName("outdir");
    obofile = ajAcdGetInfile ("obofile");
    taxdir = ajAcdGetDirectory ("taxdir");
    resfile = ajAcdGetInfile ("dbxreffile");

    ajUser("Directory '%S'", dir);
    ajUser("Set of %d", ajSeqsetGetSize(seqset));
    while(ajSeqallNext (seqall, &seq))
    {
	ajUser ("%3d <%S>", i++, ajSeqGetUsaS(seq));
	ajFmtPrintS(&kimout, "kim%d.out", i);
	ajtest_kim (kimout, seq);
    }

    ajSeqDel(&seq);
    ajSeqallDel(&seqall);
    ajSeqsetDel(&seqset);
    ajStrDel(&kimout);
    ajStrDel(&dir);

    if(taxdir)
        ajTaxLoad(taxdir);
    ajDirDel(&taxdir);

    if(obofile)
        ajOboParseObofile(obofile, "");
    ajFileClose(&obofile);

    if(resfile)
        ajResourceParse(resfile, "");
    ajFileClose(&resfile);

    embExit();

    return 0;
}
コード例 #2
0
ファイル: infoalign.c プロジェクト: WenchaoLin/JAMg
static int infoalign_Getrefseq(const AjPStr refseq, const AjPSeqset seqset)
{
    ajint i;
    const AjPSeq seq;

    for(i=0; i<(ajint)ajSeqsetGetSize(seqset); i++)
    {
	seq = ajSeqsetGetseqSeq(seqset, i);
	if(!ajStrCmpS(ajSeqGetNameS(seq), refseq))
	    return i;
    }

    /* not a name of a sequence, so it must be a number */
    if(!ajStrToInt(refseq, &i))
	ajFatal("Reference sequence is not a sequence ID or a number: %S",
		refseq);

    if(i < 0 || i > (ajint) ajSeqsetGetSize(seqset))
	ajFatal("Reference sequence number < 0 or > number "
		"of input sequences: %d", i);

    return i-1;
}
コード例 #3
0
/* @funcstatic skipredundant_SeqsetToList **************************************
**
** Builds a list of sequences from a sequence set.
** The sequences are NOT copied (only a reference is pushed onto the list)
**
** @param [u] list   [AjPList] List 
** @param [w] seqset [AjPSeqset] Sequence set
** @return [AjBool] True on success
******************************************************************************/
static AjBool skipredundant_SeqsetToList (AjPList list, AjPSeqset seqset)
{
    ajint     n      = 0;
    ajint     x      = 0;
    EmbPDmxNrseq seq_tmp = NULL;    /* Temp. pointer for making seq_list.    */
  
    if(!list || !seqset)
      return ajFalse;
    
    n = ajSeqsetGetSize(seqset);
    for(x=0; x<n; x++)
    {
        seq_tmp = embDmxNrseqNew(ajSeqsetGetseqSeq(seqset, x));
	ajListPushAppend(list, seq_tmp);
        seq_tmp = NULL;
    }

    return ajTrue;
}
コード例 #4
0
ファイル: sizeseq.c プロジェクト: WenchaoLin/JAMg
int main(int argc, char **argv)
{
    /* Variable Declarations */
    AjPSeqset seqset  = NULL;
    AjPSeqout seqout  = NULL;
    AjBool    bigfirst;
    ajuint nseqs;
    ajuint i;

    /* ACD File Processing */
    embInit("sizeseq", argc, argv);
    seqset      = ajAcdGetSeqset("sequences");
    bigfirst    = ajAcdGetBoolean("descending");
    seqout      = ajAcdGetSeqoutall("outseq");

    /* Application logic */
    ajSeqsetSortLen(seqset);
    nseqs = ajSeqsetGetSize(seqset);
    
    if(bigfirst)
    {
        for(i=nseqs; i>0; i--)
            ajSeqoutWriteSeq(seqout, ajSeqsetGetseqSeq(seqset,i-1));
    }
    else
    {
        for(i=0; i<nseqs; i++)
            ajSeqoutWriteSeq(seqout, ajSeqsetGetseqSeq(seqset,i));
    }
    

    /* Memory management and exit */
    ajSeqsetDel(&seqset);
    ajSeqoutClose(seqout);
    ajSeqoutDel(&seqout);

    embExit();

    return 0;
}
コード例 #5
0
int main(int argc, char **argv)
{
    /* Variable Declarations */
    AjPSeqset  seqset    = NULL;
    AjPMatrixf fmat      = NULL;
    float      thresh;
    float      threshlow;
    float      threshup;
    float      gapopen;
    float      gapextend;
    AjPSeqout  seqout    = NULL;
    AjPSeqout  seqoutred = NULL;
    AjPStr     mode      = NULL;
    ajint      moden;
    ajuint i;


    /* toggle "feature" from ACD not retrieved ... no need */

    const AjPSeq seq    = NULL;
    AjPList      list   = NULL;    /* List for redundancy removal.       */
    AjPUint      keep   = NULL;    /* 1: Sequence in list was non-redundant,
                                      0: redundant.    */
    ajuint       nseq   = 0;       /* No. seqs. in list.                 */
    ajint        nseqnr = 0;       /* No. non-redundant seqs. in list.   */

    /* ACD File Processing */
    embInit("skipredundant", argc, argv);
    seqset        = ajAcdGetSeqset("sequences");
    mode          = ajAcdGetListSingle("mode");
    fmat          = ajAcdGetMatrixf("datafile");
    thresh        = ajAcdGetFloat("threshold");
    threshlow     = ajAcdGetFloat("minthreshold");
    threshup      = ajAcdGetFloat("maxthreshold");
    gapopen       = ajAcdGetFloat("gapopen");
    gapextend     = ajAcdGetFloat("gapextend");
    seqout        = ajAcdGetSeqoutall("outseq");
    seqoutred     = ajAcdGetSeqoutall("redundantoutseq");



    /* Application logic */
    list    = ajListNew();
    skipredundant_SeqsetToList(list, seqset);
    keep = ajUintNew();  
    ajStrToInt(mode, &moden);


    if(moden == 1) 
      /* Remove redundancy at a single threshold % sequence similarity */
      {
	if((!embDmxSeqNR(list, &keep, &nseqnr, fmat, gapopen, 
			 gapextend, thresh, ajFalse)))
	  ajFatal("embDmxSeqNR unexpected failure!");
      }
    else if (moden == 2)
      /* 2: Remove redundancy outside a range of acceptable threshold % similarity */
      {
	if((!embDmxSeqNRRange(list, &keep, &nseqnr, fmat, gapopen, 
			      gapextend, threshlow, threshup, ajFalse)))
	  ajFatal("embDmxSeqNRRange unexpected failure!");
      }
    else 
      ajFatal("Invalid mode (not 1 or 2) which should never occur (check ACD file!)");

    nseq = ajSeqsetGetSize(seqset);
    for(i=0; i<nseq; i++)
      {
	seq = ajSeqsetGetseqSeq(seqset, i);

	if(ajUintGet(keep, i))
	  ajSeqoutWriteSeq(seqout, seq);
	else if(seqoutred)
	  ajSeqoutWriteSeq(seqoutred, seq);
      }

    /* Memory management and exit */
    ajSeqsetDel(&seqset);
    ajMatrixfDel(&fmat);
    ajStrDel(&mode);
    ajSeqoutClose(seqout);
    ajSeqoutDel(&seqout);
    if(seqoutred)
    {
	ajSeqoutClose(seqoutred);
	ajSeqoutDel(&seqoutred);
    }
    skipredundant_ClearList(list);

    ajListFree(&list);
    ajUintDel(&keep);

    embExit();

    return 0;
}
コード例 #6
0
ファイル: listor.c プロジェクト: WenchaoLin/JAMg
int main(int argc, char **argv)
{
    AjPSeqset seq1;
    AjPSeqset seq2;
    AjPFile list;
    ajint n1;
    ajint n2;
    ajint *lengths1;
    ajint *lengths2;
    ajuint *order1;
    ajuint *order2;
    ajint *hits1;
    ajint *hits2;
    ajint curr1;
    ajint curr2;
    ajint tmp1;
    ajint tmp2 = 0;
    ajint i;
    AjPStr operator;
    ajint OperatorCode=0;


    embInit("listor", argc, argv);

    seq1     = ajAcdGetSeqset("firstsequences");
    seq2     = ajAcdGetSeqset("secondsequences");
    list     = ajAcdGetOutfile("outfile");
    operator = ajAcdGetListSingle("operator");

    /* get the operator value */
    switch(ajStrGetCharFirst(operator))
    {
    case 'O':
	OperatorCode = L_OR;
	break;
    case 'A':
	OperatorCode = L_AND;
	break;
    case 'X':
	OperatorCode = L_XOR;
	break;
    case 'N':
	OperatorCode = L_NOT;
	break;
    default:
	ajFatal("Invalid operator type: %S", operator);
	embExitBad();
    }


    /* get the order of seqset 1 by length */
    n1 = ajSeqsetGetSize(seq1);

    /* lengths of seq1 entries */
    lengths1 = AJCALLOC0(n1, sizeof(ajint));

    /* seq1 entries which match seq2 */
    hits1    = AJCALLOC0(n1, sizeof(ajint));

    /* seq1 entries in length order */
    order1   = AJCALLOC0(n1, sizeof(ajint));
    for(i=0; i<n1; i++)
    {
	lengths1[i] = ajSeqGetLen(ajSeqsetGetseqSeq(seq1, i));
	order1[i]   = i;
	hits1[i]    = -1;
    }
    ajSortIntIncI(lengths1, order1, n1);

    /* get the order of seqset 2 by length */
    n2 = ajSeqsetGetSize(seq2);
    lengths2 = AJCALLOC0(n2, sizeof(ajint));
    hits2    = AJCALLOC0(n2, sizeof(ajint));
    order2   = AJCALLOC0(n2, sizeof(ajint));

    for(i=0; i<n2; i++)
    {
	lengths2[i] = ajSeqGetLen(ajSeqsetGetseqSeq(seq2, i));
	order2[i]   = i;
	hits2[i]    = -1;
    }
    ajSortIntIncI(lengths2, order2, n2);

    /*
    ** go down the two sequence sets, by size order, looking for identical
    **lengths
    */
    curr1 = 0;
    curr2 = 0;
    while(curr1 < n1 &&  curr2 < n2)
    {
	if(lengths1[order1[curr1]] < lengths2[order2[curr2]])
	    /* seq1 is shorter - increment curr1 index */
	    curr1++;
	else if(lengths1[order1[curr1]] > lengths2[order2[curr2]])
	    /* seq2 is shorter - increment curr2 index */
	    curr2++;
	else
	{
	    /* identical lengths - check all seq1/seq2 entries of this len */
	    for(tmp1=curr1; tmp1<n1
		 && lengths1[order1[tmp1]] == lengths2[order2[curr2]]; tmp1++)
		for(tmp2=curr2; tmp2<n2 && lengths2[order2[tmp2]] ==
		    lengths2[order2[curr2]]; tmp2++)
		    /* check to see if the sequences are identical */
		    if(!ajStrCmpCaseS(ajSeqGetSeqS(ajSeqsetGetseqSeq(seq1,
							     order1[tmp1])),
				      ajSeqGetSeqS(ajSeqsetGetseqSeq(seq2,
				      order2[tmp2]))))
		    {
			hits1[order1[tmp1]] = order2[tmp2];
			hits2[order2[tmp2]] = order1[tmp1];
		    }

	    curr1 = tmp1;
	    curr2 = tmp2;
	}
    }

    /* output the required entries to the list file */
    listor_Output(list, OperatorCode, seq1, seq2, hits1, hits2, n1, n2);


    AJFREE(lengths1);
    AJFREE(lengths2);
    AJFREE(order1);
    AJFREE(order2);
    AJFREE(hits1);
    AJFREE(hits2);
    ajFileClose(&list);
    ajStrDel(&operator);

    ajSeqsetDel(&seq1);
    ajSeqsetDel(&seq2);

    embExit();

    return 0;
}
コード例 #7
0
int main(int argc, char **argv)
{
    AjPSeqall seq1;
    AjPSeqset seq2;
    AjPSeq a;
    const AjPSeq b;
    AjPStr m = 0;
    AjPStr n = 0;

    AjPFile errorf;
    AjBool show = ajFalse;

    ajint    lena = 0;
    ajint    lenb = 0;

    const char   *p;
    const char   *q;

    AjPMatrixf matrix;
    AjPSeqCvt cvt = 0;
    float **sub;
    ajint *compass = NULL;
    float *path = NULL;

    float gapopen;
    float gapextend;
    float score;


    ajint begina;
    ajint i;
    ajuint k;
    ajint beginb;
    ajint start1 = 0;
    ajint start2 = 0;
    ajint end1   = 0;
    ajint end2   = 0;
    ajint width  = 0;
    AjPTable seq1MatchTable = 0;
    ajint wordlen = 6;
    ajint oldmax = 0;

    AjPAlign align = NULL;

    embInit("supermatcher", argc, argv);

    matrix    = ajAcdGetMatrixf("datafile");
    seq1      = ajAcdGetSeqall("asequence");
    seq2      = ajAcdGetSeqset("bsequence");
    gapopen   = ajAcdGetFloat("gapopen");
    gapextend = ajAcdGetFloat("gapextend");
    wordlen   = ajAcdGetInt("wordlen");
    align     = ajAcdGetAlign("outfile");
    errorf    = ajAcdGetOutfile("errorfile");
    width     = ajAcdGetInt("width");	/* not the same as awidth */

    gapopen   = ajRoundFloat(gapopen, 8);
    gapextend = ajRoundFloat(gapextend, 8);

    sub = ajMatrixfGetMatrix(matrix);
    cvt = ajMatrixfGetCvt(matrix);

    embWordLength(wordlen);

    ajSeqsetTrim(seq2);

    while(ajSeqallNext(seq1,&a))
    {
        ajSeqTrim(a);
	begina = 1 + ajSeqGetOffset(a);

	m = ajStrNewRes(1+ajSeqGetLen(a));

	lena = ajSeqGetLen(a);

	ajDebug("Read '%S'\n", ajSeqGetNameS(a));

	if(!embWordGetTable(&seq1MatchTable, a)) /* get table of words */
	    ajErr("Could not generate table for %s\n",
		  ajSeqGetNameC(a));

	for(k=0;k<ajSeqsetGetSize(seq2);k++)
	{
	    b      = ajSeqsetGetseqSeq(seq2, k);
	    lenb   = ajSeqGetLen(b);
	    beginb = 1 + ajSeqGetOffset(b);

	    ajDebug("Processing '%S'\n", ajSeqGetNameS(b));
	    p = ajSeqGetSeqC(a);
	    q = ajSeqGetSeqC(b);

	    if(!supermatcher_findstartpoints(seq1MatchTable,b,a,
					     &start1, &start2,
					     &end1, &end2))
	    {
		ajFmtPrintF(errorf,
			    "No wordmatch start points for "
			    "%s vs %s. No alignment\n",
			    ajSeqGetNameC(a),ajSeqGetNameC(b));
		continue;
	    }
	    
        n=ajStrNewRes(1+ajSeqGetLen(b));
        ajStrAssignC(&m,"");
        ajStrAssignC(&n,"");

	    ajDebug("++ %S v %S start:%d %d end:%d %d\n",
		    ajSeqGetNameS(a), ajSeqGetNameS(b),
		    start1, start2, end1, end2);

	    if(end1-start1+1 > oldmax)
	    {
		oldmax = ((end1-start1)+1);
		AJRESIZE(path,oldmax*width*sizeof(float));
		AJRESIZE(compass,oldmax*width*sizeof(ajint));
		ajDebug("++ resize to oldmax: %d\n", oldmax);
	    }

	    for(i=0;i<((end1-start1)+1)*width;i++)
		path[i] = 0.0;

	    ajDebug("Calling embAlignPathCalcFast "
		     "%d..%d [%d/%d] %d..%d [%d/%d]\n",
		     start1, end1, (end1 - start1 + 1), lena,
		     start2, end2, (end2 - start2 + 1), lenb);

	    score = embAlignPathCalcSWFast(&p[start1],&q[start2],
                                           end1-start1+1,end2-start2+1,
                                           0,width,
                                           gapopen,gapextend,
                                           path,sub,cvt,
                                           compass,show);

	    embAlignWalkSWMatrixFast(path,compass,gapopen,gapextend,a,b,
					 &m,&n,end1-start1+1,end2-start2+1,
					 0,width,
                                         &start1,&start2);

		if(!ajAlignFormatShowsSequences(align))
		{
		    ajAlignDefineCC(align, ajStrGetPtr(m),
		            ajStrGetPtr(n), ajSeqGetNameC(a),
		            ajSeqGetNameC(b));
		    ajAlignSetScoreR(align, score);
		}
		else
		{
		    embAlignReportLocal(align, a, b,
		            m,n,start1,start2,
		            gapopen, gapextend,
		            score,matrix, begina, beginb);
		}
		ajAlignWrite(align);
		ajAlignReset(align);
	    ajStrDel(&n);
	}

	embWordFreeTable(&seq1MatchTable); /* free table of words */
	seq1MatchTable=0;

	ajStrDel(&m);

    }

    if(!ajAlignFormatShowsSequences(align))
    {
        ajMatrixfDel(&matrix);        
    }
    
    AJFREE(path);
    AJFREE(compass);

    ajAlignClose(align);
    ajAlignDel(&align);
    ajSeqallDel(&seq1);
    ajSeqDel(&a);
    ajSeqsetDel(&seq2);
    ajFileClose(&errorf);

    embExit();

    return 0;
}
コード例 #8
0
ファイル: infoalign.c プロジェクト: WenchaoLin/JAMg
int main(int argc, char **argv)
{

    AjPSeqset seqset = NULL;
    AjPStr refseq;	/* input name/number of reference sequence */
    ajint  nrefseq;	/* numeric reference sequence */
    AjPMatrix matrix;	/* scoring matrix structure */
    ajint **sub;	/* integer scoring matrix */
    AjPSeqCvt cvt = 0;	/* conversion table for scoring matrix */
    float identity;
    ajint ident;
    float fplural;
    AjPStr cons;
    AjPSeq consensus;

    const AjPSeq ref;
    const AjPSeq seq;
    ajuint i;

    AjBool html;
    AjBool doheader;
    AjBool dousa;
    AjBool doname;
    AjBool doseqlength;
    AjBool doalignlength;
    AjBool dogaps;
    AjBool dogapcount;
    AjBool doidcount;
    AjBool dosimcount;
    AjBool dodifcount;
    AjBool dochange;
    AjBool dodesc;
    AjBool dowt;

    ajint  seqlength;
    ajint  alignlength;
    ajint  gaps;
    ajint  gapcount;
    ajint  idcount;
    ajint  simcount;
    ajint  difcount;
    float  change;

    AjPFile outfile;

    const AjPStr usa;
    const AjPStr name;
    AjPStr altusa;			/* default name when the real name
					   is not known */
    AjPStr altname;

    AjPStr xxx = NULL;

    embInit("infoalign", argc, argv);


    seqset  = ajAcdGetSeqset("sequence");
    refseq  = ajAcdGetString("refseq");
    matrix  = ajAcdGetMatrix("matrix");

    ajSeqsetFill(seqset);

    outfile = ajAcdGetOutfile("outfile");

    html          = ajAcdGetBoolean("html");
    doheader      = ajAcdGetBoolean("heading");
    dousa         = ajAcdGetBoolean("usa");
    doname        = ajAcdGetBoolean("name");
    doseqlength   = ajAcdGetBoolean("seqlength");
    doalignlength = ajAcdGetBoolean("alignlength");
    dogaps        = ajAcdGetBoolean("gaps");
    dogapcount    = ajAcdGetBoolean("gapcount");
    doidcount     = ajAcdGetBoolean("idcount");
    dosimcount    = ajAcdGetBoolean("simcount");
    dodifcount    = ajAcdGetBoolean("diffcount");
    dochange      = ajAcdGetBoolean("change");
    dodesc        = ajAcdGetBoolean("description");
    dowt          = ajAcdGetBoolean("weight");

    /* consensus parameters */
    fplural   = ajAcdGetFloat("plurality");
    identity  = ajAcdGetFloat("identity");


    cons      = ajStrNew();
    consensus = ajSeqNew();
    altusa    = ajStrNewC("-");
    altname   = ajStrNewC("-");


    /* get conversion table and scoring matrix */
    cvt = ajMatrixGetCvt(matrix);
    sub = ajMatrixGetMatrix(matrix);

    /* get the number of the reference sequence */
    nrefseq = infoalign_Getrefseq(refseq, seqset);

    /* change the % plurality to the fraction of absolute total weight */
    fplural = ajSeqsetGetTotweight(seqset) * fplural / 100;

    /*
    ** change the % identity to the number of identical sequences at a
    ** position required for consensus
    */
    ident = ajSeqsetGetSize(seqset) * (ajint)identity / 100;

    /* get the consensus sequence */
    embConsCalc(seqset, matrix, ajSeqsetGetSize(seqset), ajSeqsetGetLen(seqset),
		 fplural, 0.0, ident, ajFalse, &cons);
    ajSeqAssignSeqS(consensus, cons);

    ajSeqAssignNameS(consensus,(xxx=ajStrNewC("Consensus")));

    /* get the reference sequence */
    if(nrefseq == -1)
	ref = consensus;
    else
	ref = ajSeqsetGetseqSeq(seqset, nrefseq);


    /* start the HTML table */
    if(html)
	ajFmtPrintF(outfile,"<table border cellpadding=4 bgcolor="
		    "\"#FFFFF0\">\n");

    /* print the header information */
    if(doheader)
    {
	/* start the HTML table title line and output the Name header */
	if(html)			
	    ajFmtPrintF(outfile, "<tr>");
	else
	    ajFmtPrintF(outfile, "%s", "# ");

	if(dousa)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>USA</th>");
	    else
		ajFmtPrintF(outfile, "%-16s", "USA");
	}

	if(doname)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Name</th>");
	    else
		ajFmtPrintF(outfile, "%-12s", "Name");
	}

	if(doseqlength)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Sequence Length</th>");
	    else
		ajFmtPrintF(outfile, "SeqLen\t");
	}

	if(doalignlength)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Aligned Length</th>");
	    else
		ajFmtPrintF(outfile, "AlignLen\t");
	}

	if(dogaps)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Gaps</th>");
	    else
		ajFmtPrintF(outfile, "Gaps\t");
	}

	if(dogapcount)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Gap Length</th>");
	    else
		ajFmtPrintF(outfile, "GapLen\t");
	}

	if(doidcount)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Identity</th>");
	    else
		ajFmtPrintF(outfile, "Ident\t");
	}

	if(dosimcount)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Similarity</th>");
	    else
		ajFmtPrintF(outfile, "Similar\t");
	}

	if(dodifcount)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Difference</th>");
	    else
		ajFmtPrintF(outfile, "Differ\t");
	}

	if(dochange)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>%% Change</th>");
	    else
		ajFmtPrintF(outfile, "%% Change\t");
	}

        if(dowt)
        {
            if(html)
                ajFmtPrintF(outfile, "<th>Weight</th>");
            else
                ajFmtPrintF(outfile, "Weight\t");
        }


	if(dodesc)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Description</th>");
	    else
		ajFmtPrintF(outfile, "Description");
	}

	/* end the HTML table title line */
	if(html)
	    ajFmtPrintF(outfile, "</tr>\n");
	else
	    ajFmtPrintF(outfile, "\n");
    }


    for(i=0; i<ajSeqsetGetSize(seqset); i++)
    {
    	seq = ajSeqsetGetseqSeq(seqset, i);

	/* get the usa ('-' if unknown) */
	usa = ajSeqGetUsaS(seq);
	if(ajStrGetLen(usa) == 0)
	    usa = altusa;

	/* get the name ('-' if unknown) */
	name = ajSeqGetNameS(seq);
	if(ajStrGetLen(name) == 0)
	    name = altname;

	/* get the stats from the comparison to the reference sequence */
	infoalign_Compare(ref, seq, sub, cvt, &seqlength, &alignlength,
			  &gaps, &gapcount, &idcount, &simcount,
			  &difcount, &change);

	/* start table line */
	if(html)
	    ajFmtPrintF(outfile, "<tr>");

	if(dousa)
	    infoalign_OutputStr(outfile, usa, html,
				(dodesc || dowt || dochange ||
				 dodifcount || dosimcount ||
				 doidcount || dogapcount || dogaps ||
				 doseqlength || doalignlength || doname), 18);
	
	if(doname)
	    infoalign_OutputStr(outfile, name, html,
				(dodesc || dowt || dochange ||
				 dodifcount || dosimcount ||
				 doidcount || dogapcount || dogaps ||
				 doseqlength || doalignlength), 14);
	
	if(doseqlength)
	    infoalign_OutputInt(outfile, seqlength, html,
				(dodesc || dowt || dochange ||
				 dodifcount || dosimcount ||
				 doidcount || dogapcount ||
				 dogaps || doalignlength));
	
	if(doalignlength)
	    infoalign_OutputInt(outfile, alignlength, html,
				(dodesc || dowt || dochange ||
				 dodifcount || dosimcount ||
				 doidcount || dogapcount || dogaps));
	
	if(dogaps)
	    infoalign_OutputInt(outfile, gaps, html,
				(dodesc || dowt || dochange ||
				 dodifcount || dosimcount ||
				 doidcount || dogapcount));
	
	if(dogapcount)
	    infoalign_OutputInt(outfile, gapcount, html,
				(dodesc || dowt || dochange ||
				 dodifcount || dosimcount ||
				 doidcount));
	
	if(doidcount)
	    infoalign_OutputInt(outfile, idcount, html,
				(dodesc || dowt || dochange ||
                                 dodifcount || dosimcount));
	
	if(dosimcount)
	    infoalign_OutputInt(outfile, simcount, html, 
                                (dodesc || dowt || dochange ||
				 dodifcount));
	
	if(dodifcount)
	    infoalign_OutputInt(outfile, difcount, html, 
           		        (dodesc || dowt || dochange));
	
	if(dochange)
	    infoalign_OutputFloat(outfile, change, html, (dodesc || dowt) );
	
        if(dowt)
            infoalign_OutputFloat(outfile, ajSeqsetGetseqWeight(seqset,i), html, 
            	dodesc);

	if(dodesc)
	    infoalign_OutputStr(outfile, ajSeqGetDescS(seq), html, ajFalse, 
	    	NOLIMIT);
	
	/* end table line */
	if(html)
	    ajFmtPrintF(outfile, "</tr>\n");
	else
	    ajFmtPrintF(outfile, "\n");
    }
    
    
    /* end the HTML table */
    if(html)
	ajFmtPrintF(outfile, "</table>\n");
    
    ajFileClose(&outfile);
    
    /* tidy up */
    ajStrDel(&altusa);
    ajStrDel(&altname);
    ajStrDel(&xxx);
    ajSeqDel(&consensus);

    ajSeqsetDel(&seqset);
    ajStrDel(&refseq);
    ajMatrixDel(&matrix);
    ajStrDel(&cons);

    embExit();
    return 0;
}
コード例 #9
0
int main(int argc, char **argv)
{
    ajint i;
    ajint numseq;
    ajint j = 0;
    ajint numres;
    ajint count;
    ajint k;
    ajint kmax;
    float defheight;
    float currentscale;
    AjPStr shade = NULL;
    AjPFloat pair  = NULL;
    AjPGraph graph = NULL;
    AjPMatrix cmpmatrix = NULL;
    AjPSeqCvt cvt = NULL;
    AjPStr matcodes = NULL;
    AjBool consensus;
    AjBool colourbyconsensus;
    AjBool colourbyresidues;
    AjBool colourbyshade = AJFALSE;
    AjBool boxit;
    AjBool boxcol;
    AjBool portrait;
    AjBool collision;
    ajint identity;
    AjBool listoptions;
    ajint alternative;
    AjPStr altstr = NULL;
    AjPStr sidentity = NULL;
    AjPStr ssimilarity = NULL;
    AjPStr sother = NULL;
    AjPStr sboxcolval = NULL;
    AjPStr options = NULL;
    /*    ajint showscore = 0; */
    ajint iboxcolval = 0;
    ajint cidentity = RED;
    ajint csimilarity = GREEN;
    ajint cother = BLACK;
    float fxp;
    float fyp;
    float yincr;
    float y;
    ajint ixlen;
    ajint iylen;
    ajint ixoff;
    ajint iyoff;
    char res[2] = " ";

    float *score = 0;
    float scoremax = 0;

    float *identical = NULL;
    ajint identicalmaxindex;
    float *matching = NULL;
    ajint matchingmaxindex;

    float *colcheck = NULL;

    ajint **matrix;
    ajint m1 = 0;
    ajint m2 = 0;
    ajint ms = 0;
    ajint highindex = 0;
    ajint myindex;
    ajint *previous = 0;
    AjBool iscons = ajFalse;
    ajint currentstate = 0;
    ajint oldfg = 0;
    float fold = 0.0;
    ajint *colmat = 0;
    ajint *shadecolour = 0;
    /* float identthresh = 1.5; */
    /* float simthresh = 1.0; */
    /* float relthresh = 0.5; */
    float part = 0.0;
    const char *cptr;
    ajint resbreak;
    float fplural;
    float ystart;
    float xmin;
    float xmax;
    float xmid;
    AjPTime ajtime;
    ajint gapcount = 0;
    ajint countforgap = 0;
    ajint boxindex;
    float max;
    ajint matsize;
    ajint seqperpage = 0;
    ajint startseq;
    ajint endseq;
    ajint newILend = 0;
    ajint newILstart;
    void *freeptr;
    ajint itmp;
    
    embInit("prettyplot", argc, argv);

    seqset   = ajAcdGetSeqset("sequences");
    numres   = ajAcdGetInt("residuesperline");
    resbreak = ajAcdGetInt("resbreak");

    ajSeqsetFill(seqset);	/* Pads sequence set with gap characters */
    numseq = ajSeqsetGetSize(seqset);

    graph             = ajAcdGetGraph("graph");
    colourbyconsensus = ajAcdGetBoolean("ccolours");
    colourbyresidues  = ajAcdGetBoolean("docolour");
    shade             = ajAcdGetString("shade");
    pair              = ajAcdGetArray("pair");
    identity          = ajAcdGetInt("identity");
    boxit             = ajAcdGetBoolean("box");

    ajtime = ajTimeNewTodayFmt("daytime");

    ajSeqsetTrim(seqset);
    /* offset = ajSeqsetGetOffset(seqset); Unused */

    ajGraphAppendTitleS(graph, ajSeqsetGetUsa(seqset));

    if(boxit)
    {
	AJCNEW(seqboxptr, numseq);
	for(i=0;i<numseq;i++)
	    AJCNEW(seqboxptr[i], ajSeqsetGetLen(seqset));
    }
    boxcol      = ajAcdGetBoolean("boxcol");
    sboxcolval  = ajAcdGetString("boxuse");

    if(boxcol)
    {
	iboxcolval = ajGraphicsCheckColourS(sboxcolval);
	if(iboxcolval == -1)
	    iboxcolval = GREY;
    }

    consensus = ajAcdGetBoolean("consensus");
    if(consensus)
    {
	AJCNEW(constr, ajSeqsetGetLen(seqset)+1);
	constr[0] = '\0';
    }
    shownames   = ajAcdGetBoolean("name");
    shownumbers = ajAcdGetBoolean("number");
    charlen     = ajAcdGetInt("maxnamelen");
    fplural     = ajAcdGetFloat("plurality");
    portrait    = ajAcdGetBoolean("portrait");
    collision   = ajAcdGetBoolean("collision");
    listoptions = ajAcdGetBoolean("listoptions");
    altstr = ajAcdGetListSingle("alternative");
    cmpmatrix   = ajAcdGetMatrix("matrixfile");

    ajStrToInt(altstr, &alternative);

    matrix = ajMatrixGetMatrix(cmpmatrix);
    cvt = ajMatrixGetCvt(cmpmatrix);
    matsize = ajMatrixGetSize(cmpmatrix);

    AJCNEW(identical,matsize);
    AJCNEW(matching,matsize);
    AJCNEW(colcheck,matsize);

    numgaps = numres/resbreak;
    numgaps--;

    if(portrait)
    {
	ajGraphicsSetPortrait(1);
	ystart = (float) 75.0;
    }
    else
	ystart = (float) 75.0;

    /* pair is an array of three non-negative floats */

    /* identthresh = ajFloatGet(pair,0); Unused */
    /* simthresh = ajFloatGet(pair,1); Unused */
    /* relthresh = ajFloatGet(pair,2); Unused */

    /*
    ** shade is a formatted 4-character string. Characters BLPW only.
    ** controlled by a pattern in ACD.
    */

    if(ajStrGetLen(shade))
    {
	AJCNEW(shadecolour,4);
	cptr = ajStrGetPtr(shade);
	for(i=0;i<4;i++){
	    if(cptr[i]== 'B' || cptr[i]== 'b')
		shadecolour[i] = BLACK;
	    else if(cptr[i]== 'L' || cptr[i]== 'l')
		shadecolour[i] = BROWN;
	    else if(cptr[i]== 'P' || cptr[i]== 'p')
		shadecolour[i] = WHEAT;
	    else if(cptr[i]== 'W' || cptr[i]== 'w')
		shadecolour[i] = WHITE;
	}

	colourbyconsensus = colourbyresidues = ajFalse;
	colourbyshade = ajTrue;
    }

/*
** we can colour by consensus or residue but not both
** if we have to choose, use the consensus
*/

    if(colourbyconsensus && colourbyresidues)
	colourbyconsensus = AJFALSE;

    sidentity = ajAcdGetString("cidentity");
    ssimilarity = ajAcdGetString("csimilarity");
    sother = ajAcdGetString("cother");

    if(colourbyconsensus)
    {
	cidentity = ajGraphicsCheckColourS(sidentity);
	if(cidentity == -1)
	    cidentity = RED;

	csimilarity = ajGraphicsCheckColourS(ssimilarity);
	if(csimilarity == -1)
	    csimilarity = GREEN;


	cother = ajGraphicsCheckColourS(sother);
	if(cother == -1)
	    cother = BLACK;

    }
    else if(colourbyresidues)
    {
	matcodes = ajMatrixGetCodes(cmpmatrix);
	if(ajSeqsetIsProt(seqset))
	    colmat = ajGraphicsBasecolourNewProt(matcodes);
	else
	    colmat = ajGraphicsBasecolourNewNuc(matcodes);
    }


    /* output the options used as the subtitle for the bottom of the graph */
    if(listoptions)
    {
	ajStrAssignC(&options,"");
	ajFmtPrintAppS(&options,"-plurality %.1f",fplural);

	if(collision)
	    ajStrAppendC(&options," -collision");
	else
	    ajStrAppendC(&options," -nocollision");

	if(boxit)
	    ajStrAppendC(&options," -box");
	else
	    ajStrAppendC(&options," -nobox");

	if(boxcol)
	    ajStrAppendC(&options," -boxcol");
	else
	    ajStrAppendC(&options," -noboxcol");

	if(colourbyconsensus)
	    ajStrAppendC(&options," -colbyconsensus");
	else if(colourbyresidues)
	    ajStrAppendC(&options," -colbyresidues");
	else if(colourbyshade)
	    ajStrAppendC(&options," -colbyshade");
	else
	    ajStrAppendC(&options," -nocolour");

	if(alternative==2)
	    ajStrAppendC(&options," -alt 2");
	else if(alternative==1)
	    ajStrAppendC(&options," -alt 1");
	else if(alternative==3)
	    ajStrAppendC(&options," -alt 3");
    }


    AJCNEW(seqcolptr, numseq);
    for(i=0;i<numseq;i++)
	AJCNEW(seqcolptr[i], ajSeqsetGetLen(seqset));

    AJCNEW(seqcharptr, numseq);
    AJCNEW(seqnames, numseq);
    AJCNEW(score, numseq);
    AJCNEW(previous, numseq);
    AJCNEW(seqcount, numseq);

    for(i=0;i<numseq;i++)
    {
	ajSeqsetFmtUpper(seqset);
	seqcharptr[i] =  ajSeqsetGetseqSeqC(seqset, i);
	seqnames[i] = 0;
	ajStrAppendS(&seqnames[i],ajSeqsetGetseqNameS(seqset, i));
	ajStrTruncateLen(&seqnames[i],charlen);
	previous[i] = 0;
	seqcount[i] = 0;
    }

    /*
    ** user will pass the number of residues to fit a page
    ** therefore we now need to calculate the size of the chars
    ** based on this and get the new char width.
    ** 'charlen' maximum characters for the name (truncated above)
    */

    ajGraphicsGetCharsize(&defheight,&currentscale);

    xmin = -charlen - (float)2.0;
    xmax = (float)numres+(float)11.0+(float)(numres/resbreak);
    xmid = (xmax + xmin)/(float)2.0;

    ajGraphOpenWin(graph, xmin, xmax,
		   (float)0.0, ystart+(float)1.0);
 
    ajGraphGetParamsPage(graph, &fxp,&fyp,&ixlen,&iylen,&ixoff,&iyoff);

    if(portrait)
    {
        itmp = ixlen;
        ixlen = iylen;
        iylen = itmp;
    }

    ajGraphicsGetCharsize(&defheight,&currentscale);

    ajGraphicsSetCharscale(((float)ixlen/((float)(numres+charlen+1)*
                                          (currentscale * (float) 1.5)))/
                                           currentscale);

/*    ajGraphicsSetCharscale(((float)ixlen/((float)(numres+charlen)*
                                          (currentscale+(float)1.0)))/
                                          currentscale); */

    ajGraphicsGetCharsize(&defheight,&currentscale);

    yincr = (currentscale + (float)3.0)*(float)0.3;

/*
** If we have titles (now the standard graph title and subtitle and footer)
** leave 7 rows of space for them
*/
    y=ystart-(float)7.0;

    if(ajStrGetLen(options))
    {
	fold = ajGraphicsSetCharscale(1.0);
	ajGraphicsDrawposTextAtmid(xmid,2.0,
                                   ajStrGetPtr(options));
	ajGraphicsSetCharscale(fold);
    }

/* if sequences per page not set then calculate it */

    if(!seqperpage)
    {
	seqperpage = prettyplot_calcseqperpage(yincr,y,consensus);
	if(seqperpage>numseq)
	    seqperpage=numseq;
    }

    count = 0;

/*
** for boxes we need to set a foreground colour for the box lines
** and save the current foreground colour
*/
    if(boxit && boxcol)
	oldfg = ajGraphicsSetFgcolour(iboxcolval);

/*
** step through each residue position
*/

    kmax = ajSeqsetGetLen(seqset) - 1;
    for(k=0; k<= kmax; k++)
    {
	/* reset column score array */
	for(i=0;i<numseq;i++)
	    score[i] = 0.0;

	/* reset matrix character testing arrays */
	for(i=0;i<matsize;i++)
	{
	    identical[i] = 0.0;
	    matching[i] = 0.0;
	    colcheck[i] = 0.0;
	}

	/* generate a score for this residue in each sequence */
	for(i=0;i<numseq;i++)
	{
	    m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]);
	    for(j=0;j<numseq;j++)
	    {
		m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[j][k]);
		if(m1 && m2)
		    score[i] += (float)matrix[m1][m2]*
			ajSeqsetGetseqWeight(seqset, j);
	    }
	    if(m1)
		identical[m1] += ajSeqsetGetseqWeight(seqset, i);
	}

	/* find the highest score */
	highindex = -1;
	scoremax  = INT_MIN;
	/*ajDebug("Scores at position %d:\n", k);*/

	for(i=0;i<numseq;i++)
	{
	    /*ajDebug("  seq %d: '%c' %f\n",i,seqcharptr[i][k],score[i]);*/

	    if(score[i] > scoremax)
	    {
		scoremax = score[i];
		highindex = i;
	    }
	}
	for(i=0;i<numseq;i++)
	{
	    m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]);

	    if(!matching[m1])
	    {
		for(j=0;j<numseq;j++)
		{
		    m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[j][k]);
		    if(m1 && m2 && matrix[m1][m2] > 0)
			matching[m1] += ajSeqsetGetseqWeight(seqset, j);
		}
	    }
	}

	/* find highs for matching and identical */
	matchingmaxindex  = 0;
	identicalmaxindex = 0;
	for(i=0;i<numseq;i++)
	{
	    m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]);
	    if(identical[m1] > identical[identicalmaxindex])
		identicalmaxindex = m1;
	}
	for(i=0;i<numseq;i++)
	{
	    m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]);
	    if(matching[m1] > matching[matchingmaxindex])
		matchingmaxindex = m1;
	    else if(matching[m1] ==  matching[matchingmaxindex])
	    {
		if(identical[m1] > identical[matchingmaxindex])
		    matchingmaxindex= m1;
	    }
	}

	iscons = ajFalse;
	boxindex = -1;
	max = -3;

	ajDebug("k:%2d highindex:%2d matching:%4.2f\n",
		k, highindex,
		matching[ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])]);
	if(highindex != -1 &&
	   matching[ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])] >= fplural)
	{
	    iscons = ajTrue;
	    boxindex = highindex;
	}
	else
	{
	    for(i=0;i<numseq;i++)
	    {
		m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]);
		if(matching[m1] > max)
		{
		    max = matching[m1];
		    highindex = i;
		}
		else if(matching[m1] == max)
		{
		    if(identical[m1] >
		       identical[ajSeqcvtGetCodeK(cvt,
                                                  seqcharptr[highindex][k])] )
		    {
			max = matching[m1];
			highindex = i;
		    }
		}
	    }

	    if(matching[ajSeqcvtGetCodeK(cvt,
                                         seqcharptr[highindex][k])] >= fplural)
	    {
		iscons = ajTrue;
		boxindex = highindex;
	    }
	}


	if(iscons)
	{
	    if(!collision)
	    {
		/* check for collisions */
		if(alternative == 1)
		{
		    /* check to see if this is unique for collisions */
		    for(i=0;i<numseq;i++)
		    {
			m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]);
			if(identical[m1] >= identical[identicalmaxindex] &&
			   m1 != identicalmaxindex)
			    iscons = ajFalse;
		    }

		    /*ajDebug("after (alt=1) iscons: %B",iscons);*/
		}

		else if(alternative == 2)
		{
		    for(i=0;i<numseq;i++)
		    {
			m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]);

			if((matching[m1] >= matching[matchingmaxindex] &&
			    m1 != matchingmaxindex &&
			    matrix[m1][matchingmaxindex] < 0.1)||
			   (identical[m1] >= identical[matchingmaxindex]
			   && m1 != matchingmaxindex))
			    iscons = ajFalse;
		    }
		}
		else if(alternative == 3)
		{
		    /*
		    ** to do this check one is NOT in consensus to see if
		    ** another score of fplural has been found
		    */
		    ms = ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k]);

		    for(i=0;i<numseq;i++)
		    {
			m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]);
			if(ms != m1 && colcheck[m1] == 0.0)
			    /* NOT in the current consensus */
			    for(j=0;j<numseq;j++)
			    {
				m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[j][k]);
				if( matrix[ms][m2] < 0.1)
				{
				    /* NOT in the current consensus */
				    if( matrix[m1][m2] > 0.1)
					colcheck[m1] +=
                                            ajSeqsetGetseqWeight(seqset,
                                                                 j);
				}
			    }
		    }

		    for(i=0;i<numseq;i++)
		    {
			m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]);
			/* if any other matches then we have a collision */
			if(colcheck[m1] >= fplural)
			    iscons = ajFalse;
		    }

		    /*ajDebug("after alt=2 iscons: %B", iscons);*/
		}
		else
		{
		    for(i=0;i<numseq;i++)
		    {
			m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]);
			if((matching[m1] >= matching[matchingmaxindex] &&
			    m1 != matchingmaxindex &&
			    matrix[m1][matchingmaxindex] < 0.1))
			    iscons = ajFalse;
			if(identical[m1] >= identical[matchingmaxindex] &&
			   m1 != matchingmaxindex &&
			   matrix[m1][matchingmaxindex] > 0.1)
			    iscons = ajFalse;
		    }

		    if(!iscons)
		    {	/* matches failed try identicals */
			if(identical[identicalmaxindex] >= fplural)
			{
			    iscons = ajTrue;
			    /*
			    ** if nothing has an equal or higher match that
			    ** does not match highest then false
			    */
			    for(i=0;i<numseq;i++)
			    {
				m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]);
				if(identical[m1] >=
				   identical[identicalmaxindex] &&
				   m1 != identicalmaxindex)
				    iscons = ajFalse;
				else if(matching[m1] >=
					matching[identicalmaxindex] &&
					matrix[m1][matchingmaxindex] <= 0.0)
				    iscons = ajFalse;
				else if(m1 == identicalmaxindex)
				    j = i;
			    }

			    if(iscons)
				highindex = j;
			}
		    }

		}
	    }

	    if(identity)
	    {
		j = 0;
		for(i=0;i<numseq;i++)
		    if(seqcharptr[highindex][k] == seqcharptr[i][k])
			j++;

		if(j<identity)
		    iscons = ajFalse;
	    }
	}

	/*
	** Done a full line of residues
	** Boxes have been defined up to this point
	*/
	if(count >= numres )
	{
	    /* check y position for next set */
	    y=y-(yincr*((float)numseq+(float)2.0+((float)consensus*(float)2)));
	    if(y<yincr*((float)numseq+(float)2.0+((float)consensus*(float)2)))
	    {
		/* full page - print it */
		y=ystart-(float)6.0;

		startseq = 0;
		endseq = seqperpage;
		newILstart = newILend;
		newILend = k;
		while(startseq < numseq)
		{
		    /* AJB */
		    /*if(startseq != 0)
		    	ajGraphNewpage(graph, AJFALSE);*/

		    /*ajDebug("Inner loop: startseq: %d numseq: %d endseq: %d\n",
			    startseq, numseq, endseq);*/
		    if(endseq>numseq)
			endseq=numseq;
		    prettyplot_fillinboxes(numseq,ajSeqsetGetLen(seqset),
					   startseq,endseq,
					   newILstart,newILend,
					   numres,resbreak,
					   boxit,boxcol,consensus,
					   ystart,yincr,cvt);
		    startseq = endseq;
		    endseq += seqperpage;
		    ajGraphNewpage(graph, AJFALSE);
		}
	    }

	    count = 0;
	    gapcount = 0;
	}

	count++;
	countforgap++;

	for(j=0;j<numseq;j++)
	{
	    /* START OF BOXES */

	    if(boxit)
	    {
		seqboxptr[j][k] = 0;
		if(boxindex!=-1)
		{
		    myindex = boxindex;
		    if(matrix[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])]
		       [ajSeqcvtGetCodeK(cvt, seqcharptr[myindex][k])] > 0)
			part = 1.0;
		    else
		    {
			if(identical[ajSeqcvtGetCodeK(cvt,
                                                      seqcharptr[j][k])] >=
			   fplural)
			    part = 1.0;
			else
			    part = 0.0;
		    }

		    if(previous[j] != part)
			/* draw vertical line */
			seqboxptr[j][k] |= BOXLEF;

		    if(j==0)
		    {
			/* special case for horizontal line */
			if(part)
			{
			    currentstate = 1;
			    /* draw hori line */
			    seqboxptr[j][k] |= BOXTOP;
			}
			else
			    currentstate = 0;
		    }
		    else
		    {
			/* j != 0  Normal case for horizontal line */
			if(part != currentstate)
			{
			    /*draw hori line */
			    seqboxptr[j][k] |= BOXTOP;
			    currentstate = (ajint) part;
			}
		    }

		    if(j== numseq-1 && currentstate)
			/* draw horiline at bottom */
			seqboxptr[j][k] |= BOXBOT;

		    previous[j] = (ajint) part;
		}
		else
		{
		    part = 0;
		    if(previous[j])
		    {
			/* draw vertical line */
			seqboxptr[j][k] |= BOXLEF;
		    }
		    previous[j] = 0;
		}

		if(count == numres || k == kmax || countforgap >= resbreak )
		{			/* last one on the row or a break*/
		    if(previous[j])
		    {
			/* draw vertical line */
			seqboxptr[j][k] |= BOXRIG;
		    }
		    previous[j] = 0;
		}

	    } /* end box */

	    if(boxit && boxcol)
		if(boxindex != -1)
		{
		    myindex = boxindex;
		    if(matrix[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])]
		       [ajSeqcvtGetCodeK(cvt, seqcharptr[myindex][k])] > 0
		       || identical[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])] >=
                       fplural )

			seqboxptr[j][k] |= BOXCOLOURED;
		}

	    /* END OF BOXES */




	    if(ajSeqcvtGetCodeK(cvt, seqcharptr[j][k]))
		res[0] = seqcharptr[j][k];
	    else
		res[0] = '-';

	    if(colourbyconsensus)
	    {
		part = (float) matrix[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])]
		    [ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])];
		if(iscons && seqcharptr[highindex][k] == seqcharptr[j][k])
		    seqcolptr[j][k] = cidentity;
		else if(part > 0.0)
		    seqcolptr[j][k] = csimilarity;
		else
		    seqcolptr[j][k] = cother;
	    }
	    else if(colourbyresidues)
		seqcolptr[j][k] = colmat[ajSeqcvtGetCodeK(cvt,
                                                          seqcharptr[j][k])];
	    else if(iscons && colourbyshade)
	    {
		part = (float) matrix[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])]
		    [ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])];
		if(part >= 1.5)
		    seqcolptr[j][k] = shadecolour[0];
		else if(part >= 1.0)
		    seqcolptr[j][k] = shadecolour[1];
		else if(part >= 0.5)
		    seqcolptr[j][k] = shadecolour[2];
		else
		    seqcolptr[j][k] = shadecolour[3];
	    }
	    else if(colourbyshade)
		seqcolptr[j][k] = shadecolour[3];
	    else
		seqcolptr[j][k] = BLACK;
	}

	if(consensus)
	{
	    if(iscons)
		res[0] = seqcharptr[highindex][k];
	    else
		res[0] = '-';
	    strcat(constr,res);
	}

	if(countforgap >= resbreak)
	{
	    gapcount++;
	    countforgap=0;
	}
    }


    startseq = 0;
    endseq=seqperpage;
    newILstart = newILend;
    newILend = k;
    while(startseq < numseq)
    {
	if(startseq)
	    ajGraphNewpage(graph, AJFALSE);

	/*ajDebug("Final loop: startseq: %d numseq: %d endseq: %d\n",
		startseq, numseq, endseq);*/
	if(endseq>numseq)
	    endseq = numseq;
	prettyplot_fillinboxes(numseq,ajSeqsetGetLen(seqset),
			       startseq,endseq,
			       newILstart,newILend,
			       numres,resbreak,
			       boxit,boxcol,consensus,
			       ystart,yincr,cvt);
	startseq = endseq;
	endseq += seqperpage;
    }


    ajGraphicsGetCharsize(&defheight,&currentscale);

    if(boxit && boxcol)
	oldfg = ajGraphicsSetFgcolour(oldfg);

    ajGraphicsCloseWin();
    ajGraphxyDel(&graph);

    ajStrDel(&sidentity);
    ajStrDel(&ssimilarity);
    ajStrDel(&sother);
    ajStrDel(&options);
    ajStrDel(&altstr);

    ajStrDel(&matcodes);

    for(i=0;i<numseq;i++)
    {
	ajStrDel(&seqnames[i]);
	AJFREE(seqcolptr[i]);
	if(seqboxptr)
            AJFREE(seqboxptr[i]);
    }
    AJFREE(seqcolptr);
    AJFREE(seqboxptr);

    AJFREE(seqnames);
    AJFREE(score);
    AJFREE(previous);
    AJFREE(seqcount);

    AJFREE(colmat);
    AJFREE(shadecolour);

    freeptr = (void *) seqcharptr;
    AJFREE(freeptr);

    AJFREE(identical);
    AJFREE(matching);
    AJFREE(colcheck);

    ajSeqsetDel(&seqset);
    ajMatrixDel(&cmpmatrix);
    ajStrDel(&shade);
    ajStrDel(&sboxcolval);
    ajStrDel(&sidentity);
    ajStrDel(&ssimilarity);
    ajStrDel(&sother);
    ajFloatDel(&pair);
    ajTimeDel(&ajtime);
    AJFREE(constr);

    embExit();

    return 0;
}
コード例 #10
0
ファイル: polydot.c プロジェクト: WenchaoLin/JAMg
int main(int argc, char **argv)
{

    AjPSeqset seqset;
    const AjPSeq seq1;
    const AjPSeq seq2;
    ajint wordlen;
    AjPTable seq1MatchTable = NULL;
    AjPList matchlist ;
    AjPGraph graph = 0;
    ajuint i;
    ajuint j;
    float total=0;
    ajuint acceptableticks[]=
    {
	1,10,50,100,200,500,1000,1500,10000,50000,
	100000,500000,1000000,5000000
    };
    ajint numbofticks = 10;
    ajint gap,tickgap;
    AjBool boxit    = AJTRUE;
    AjBool dumpfeat = AJFALSE;
    float xmargin;
    float ymargin;
    float k;
    char ptr[10];
    float ticklen;
    float onefifth;
    AjPFeattable *tabptr = NULL;
    AjPFeattabOut seq1out = NULL;
    AjPStr sajb = NULL;
    float flen1;
    float flen2;
    ajuint tui;
    
    embInit("polydot", argc, argv);

    wordlen  = ajAcdGetInt("wordsize");
    seqset   = ajAcdGetSeqset("sequences");
    graph    = ajAcdGetGraph("graph");
    gap      = ajAcdGetInt("gap");
    boxit    = ajAcdGetBoolean("boxit");
    seq1out  = ajAcdGetFeatout("outfeat");
    dumpfeat = ajAcdGetToggle("dumpfeat");

    sajb = ajStrNew();
    embWordLength(wordlen);

    AJCNEW(lines,ajSeqsetGetSize(seqset));
    AJCNEW(pts,ajSeqsetGetSize(seqset));
    AJCNEW(tabptr,ajSeqsetGetSize(seqset));

    for(i=0;i<ajSeqsetGetSize(seqset);i++)
    {
	seq1 = ajSeqsetGetseqSeq(seqset, i);
	total += ajSeqGetLen(seq1);

    }
    
    total +=(float)(gap*(ajSeqsetGetSize(seqset)-1));
    
    xmargin = total*(float)0.15;
    ymargin = total*(float)0.15;
    
    ticklen = xmargin*(float)0.1;
    onefifth  = xmargin*(float)0.2;
    
    i = 0;
    while(acceptableticks[i]*numbofticks < ajSeqsetGetLen(seqset))
	i++;
    
    if(i<=13)
	tickgap = acceptableticks[i];
    else
	tickgap = acceptableticks[13];
    
    ajGraphAppendTitleS(graph, ajSeqsetGetUsa(seqset));

    ajGraphOpenWin(graph, (float)0.0-xmargin,(total+xmargin)*(float)1.35,
		   (float)0.0-ymargin,
		   total+ymargin);
    ajGraphicsSetCharscale((float)0.3);
    
    
    for(i=0;i<ajSeqsetGetSize(seqset);i++)
    {
	which = i;
	seq1 = ajSeqsetGetseqSeq(seqset, i);
	tui = ajSeqGetLen(seq1);
	flen1 = (float) tui;

	if(embWordGetTable(&seq1MatchTable, seq1)){ /* get table of words */
	    for(j=0;j<ajSeqsetGetSize(seqset);j++)
	    {
		seq2 = ajSeqsetGetseqSeq(seqset, j);
		tui  = ajSeqGetLen(seq2);
		flen2 = (float) tui;

		if(boxit)
		    ajGraphicsDrawposRect(xstart,ystart,
                                          xstart+flen1,
                                          ystart+flen2);

		matchlist = embWordBuildMatchTable(seq1MatchTable, seq2,
						   ajTrue);
		if(matchlist)
		    polydot_plotMatches(matchlist);

		if(i<j && dumpfeat)
		    embWordMatchListConvToFeat(matchlist,&tabptr[i],
					       &tabptr[j],seq1,
					       seq2);

		if(matchlist)	       /* free the match structures */
		    embWordMatchListDelete(&matchlist);

		if(j==0)
		{
		    for(k=0.0;k<ajSeqGetLen(seq1);k+=tickgap)
		    {
			ajGraphicsDrawposLine(xstart+k,ystart,xstart+k,
				    ystart-ticklen);

			sprintf(ptr,"%d",(ajint)k);
			ajGraphicsDrawposTextAtmid(xstart+k,
                                                   ystart-(onefifth),
                                                   ptr);
		    }
		    ajGraphicsDrawposTextAtmid(
                        xstart+(flen1/(float)2.0),
                        ystart-(3*onefifth),
                        ajStrGetPtr(ajSeqsetGetseqNameS(seqset, i)));
		}

		if(i==0)
		{
		    for(k=0.0;k<ajSeqGetLen(seq2);k+=tickgap)
		    {
			ajGraphicsDrawposLine(xstart,ystart+k,xstart-ticklen,
				    ystart+k);

			sprintf(ptr,"%d",(ajint)k);
			ajGraphicsDrawposTextAtend(xstart-(onefifth),
                                                   ystart+k,
                                                   ptr);
		    }
		    ajGraphicsDrawposTextAtlineJustify(
                        xstart-(3*onefifth),
                        ystart+(flen2/(float)2.0),
                        xstart-(3*onefifth),ystart+flen2,
                        ajStrGetPtr(ajSeqsetGetseqNameS(seqset, j)),0.5);
		}
		ystart += flen2+(float)gap;
	    }
	}
	embWordFreeTable(&seq1MatchTable);
	seq1MatchTable = NULL;
	xstart += flen1+(float)gap;
	ystart = 0.0;
    }
    
    ajGraphicsDrawposTextAtstart(total+onefifth,total-(onefifth),
		     "No. Length  Lines  Points Sequence");
    
    for(i=0;i<ajSeqsetGetSize(seqset);i++)
    {
	seq1 = ajSeqsetGetseqSeq(seqset, i);
	ajFmtPrintS(&sajb,"%3u %6d %5d %6d %s",i+1,
		    ajSeqGetLen(seq1),lines[i],
		    pts[i],ajSeqGetNameC(seq1));

	ajGraphicsDrawposTextAtstart(total+onefifth,total-(onefifth*(i+2)),
                                     ajStrGetPtr(sajb));
    }
    
    if(dumpfeat && seq1out)
    {
	for(i=0;i<ajSeqsetGetSize(seqset);i++)
	{
	    ajFeattableWrite(seq1out, tabptr[i]);
	    ajFeattableDel(&tabptr[i]);
	}
    }
    
    ajGraphicsClose();
    ajGraphxyDel(&graph);

    ajStrDel(&sajb);
    AJFREE(lines);
    AJFREE(pts);
    AJFREE(tabptr);

    ajSeqsetDel(&seqset);
    ajFeattabOutDel(&seq1out);;

    embExit();

    return 0;
}
コード例 #11
0
int main(int argc, char *argv[])
{
    char *string;
    char *structure=NULL;
    char *cstruc=NULL;
    char *ns_bases=NULL;
    char *c;
    int  n_seq;
    int  i;
    int  length;
    int  sym;
    int  endgaps = 0;
    int  mis = 0;
    double min_en;
    double real_en;
    double sfact = 1.07;
    int  pf = 0;
    int  istty;
    char *AS[MAX_NUM_NAMES];	/* aligned sequences */
    char *names[MAX_NUM_NAMES];	/* sequence names */

    AjPSeqset  seq       = NULL;

    AjPFile confile   = NULL;
    AjPFile alifile   = NULL;
    AjPFile paramfile = NULL;
    AjPFile outf      = NULL;
    AjPFile essfile   = NULL;
    AjPFile dotfile   = NULL;
    

    AjPStr constring = NULL;
  
    float eT = 0.;
    AjBool eGU;
  
    AjBool eclose;
    AjBool lonely;
    AjPStr ensbases = NULL;
    AjBool etloop;
    AjPStr eenergy = NULL;
    char ewt = '\0';
    float escale = 0.;
    AjPStr edangles = NULL;
    char edangle = '\0';

    ajint len;

    AjPSeq tseq = NULL;
    AjPStr tname = NULL;

    int circ = 0;
    int doAlnPS = 0;
    int doColor = 0;
    

    embInitPV("vrnaalifoldpf",argc,argv,"VIENNA",VERSION);
    
    
    constring = ajStrNew();
    
    seq       = ajAcdGetSeqset("sequence");
    confile   = ajAcdGetInfile("constraintfile");
    paramfile = ajAcdGetInfile("paramfile");
    eT        = ajAcdGetFloat("temperature");
    eGU       = ajAcdGetBoolean("gu");
    eclose    = ajAcdGetBoolean("closegu");
    lonely    = ajAcdGetBoolean("lp");
    ensbases  = ajAcdGetString("nsbases");
    etloop    = ajAcdGetBoolean("tetraloop");
    eenergy   = ajAcdGetListSingle("energy");
    escale    = ajAcdGetFloat("scale");
    edangles  = ajAcdGetListSingle("dangles");
    mis       = !!ajAcdGetBoolean("most");
    endgaps   = !!ajAcdGetBoolean("endgaps");
    nc_fact   = (double) ajAcdGetFloat("nspenalty");
    cv_fact   = (double) ajAcdGetFloat("covariance");

    outf      = ajAcdGetOutfile("outfile");
    essfile   = ajAcdGetOutfile("ssoutfile");
    alifile   = ajAcdGetOutfile("alignoutfile");
    circ      = !!ajAcdGetBoolean("circular");
    doColor   = !!ajAcdGetBoolean("colour");

    dotfile   = ajAcdGetOutfile("dotoutfile");
    
    
    do_backtrack = 1; 
    pf = 1;
    string = NULL;
    istty = 0;
    dangles = 2;

    temperature   = (double) eT;
    noGU          = (eGU) ? 0 : 1;
    no_closingGU  = (eclose) ? 0 : 1;
    noLonelyPairs = (lonely) ? 0 : 1;
    ns_bases      = (ajStrGetLen(ensbases)) ? MAJSTRGETPTR(ensbases) : NULL;
    tetra_loop    = !!etloop;
    
    ewt = *ajStrGetPtr(eenergy);
    if(ewt == '0')
	energy_set = 0;
    else if(ewt == '1')
	energy_set = 1;
    else if(ewt == '2')
	energy_set = 2;
    
    sfact = (double) escale;
    
    edangle = *ajStrGetPtr(edangles);
    if(edangle == '0')
	dangles = 0;
    else if(edangle == '1')
	dangles = 1;
    else if(edangle == '2')
	dangles = 2;
    else if(edangle == '3')
	dangles = 3;


    if(paramfile)
	read_parameter_file(paramfile);
   
    if (ns_bases != NULL)
    {
	nonstandards = space(33);
	c=ns_bases;
	i=sym=0;
	if (*c=='-')
	{
	    sym=1;
	    c++;
	}
	while (*c!='\0')
	{
	    if (*c!=',')
	    {
		nonstandards[i++]=*c++;
		nonstandards[i++]=*c;
		if ((sym)&&(*c!=*(c-1)))
		{
		    nonstandards[i++]=*c;
		    nonstandards[i++]=*(c-1);
		}
	    }
	    c++;
	}
    }

    if(alifile)
        doAlnPS = 1;

    
    if(confile)
	vienna_GetConstraints(confile,&constring);

    n_seq = ajSeqsetGetSize(seq);

    if(n_seq > MAX_NUM_NAMES - 1)
	ajFatal("[e]RNAalifold is restricted to %d sequences\n",
		MAX_NUM_NAMES - 1);

    if (n_seq==0)
	ajFatal("No sequences found");

    for(i=0;i<n_seq;++i)
    {
	tseq  = (AjPSeq) ajSeqsetGetseqSeq(seq,i);
	ajSeqGapStandard(tseq, '-');
	tname = (AjPStr) ajSeqsetGetseqNameS(seq,i);
	len   = ajSeqGetLen(tseq);
	AS[i] = (char *) space(len+1);
	names[i] = (char *) space(ajStrGetLen(tname)+1);
	strcpy(AS[i],ajSeqGetSeqC(tseq));
	strcpy(names[i],ajStrGetPtr(tname));
    }
    AS[n_seq] = NULL;
    names[n_seq] = NULL;
    
    if (endgaps)
	for (i=0; i<n_seq; i++)
	    mark_endgaps(AS[i], '~');


    length = (int) strlen(AS[0]);
    structure = (char *) space((unsigned) length+1);
    if(confile)
    {
	fold_constrained = 1;
	strcpy(structure,ajStrGetPtr(constring));
    }

    
  
  if (circ && noLonelyPairs)
    ajWarn(
	    "warning, depending on the origin of the circular sequence, "
	    "some structures may be missed when using -noLP\n"
	    "Try rotating your sequence a few times\n");

  if (circ)
      min_en = circalifold((const char **)AS, structure);
  else
    min_en = alifold(AS, structure);
  {
    int i;
    double s=0;
    extern int eos_debug;
    eos_debug=-1; /* shut off warnings about nonstandard pairs */
    for (i=0; AS[i]!=NULL; i++)
      if (circ)
	s += energy_of_circ_struct(AS[i], structure);
      else
	s += energy_of_struct(AS[i], structure);
    real_en = s/i;
  }
  string = (mis) ?
    consens_mis((const char **) AS) : consensus((const char **) AS);
  ajFmtPrintF(outf,"%s\n%s", string, structure);

  ajFmtPrintF(outf," (%6.2f = %6.2f + %6.2f) \n", min_en, real_en,
              min_en-real_en );

  if (length<=2500) {
    char **A;
    A = annote(structure, (const char**) AS);
    if (doColor)
      (void) PS_rna_plot_a(string, structure, essfile, A[0], A[1]);
    else
      (void) PS_rna_plot_a(string, structure, essfile, NULL, A[1]);
    free(A[0]); free(A[1]);free(A);
  } else
    ajWarn("INFO: structure too long, not doing xy_plot\n");

  if (doAlnPS)
    PS_color_aln(structure, alifile, AS,  names);

  { /* free mfe arrays but preserve base_pair for PS_dot_plot */
    struct bond  *bp;
    bp = base_pair; base_pair = space(16);
    free_alifold_arrays();  /* free's base_pair */
    free_alipf_arrays();
    base_pair = bp;
  }
  if (pf) {
    double energy, kT;
    pair_info *pi;
    char * mfe_struc;

    mfe_struc = strdup(structure);

    kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */
    pf_scale = exp(-(sfact*min_en)/kT/length);
    if (length>2000)
        ajWarn("scaling factor %f\n", pf_scale);

    /* init_alipf_fold(length); */

    if (confile)
        strncpy(structure, ajStrGetPtr(constring), length+1);
    energy = (circ) ? alipf_circ_fold(AS, structure, &pi) : alipf_fold(AS, structure, &pi);

    if (do_backtrack) {
        ajFmtPrintF(outf,"%s", structure);
        ajFmtPrintF(outf," [%6.2f]\n", energy);
    }
    if ((istty)||(!do_backtrack))
        ajFmtPrintF(outf," free energy of ensemble = %6.2f kcal/mol\n", energy);
    ajFmtPrintF(outf," frequency of mfe structure in ensemble %g\n",
	   exp((energy-min_en)/kT));

    if (do_backtrack) {
      FILE *aliout;
      cpair *cp;
      short *ptable; int k;
	ptable = make_pair_table(mfe_struc);
        ajFmtPrintF(outf,"\n# Alignment section\n\n");
        aliout = ajFileGetFileptr(outf);
        
	fprintf(aliout, "%d sequences; length of alignment %d\n",
		n_seq, length);
	fprintf(aliout, "alifold output\n");
	for (k=0; pi[k].i>0; k++) {
	  pi[k].comp = (ptable[pi[k].i] == pi[k].j) ? 1:0;
	  print_pi(pi[k], aliout);
	}
	fprintf(aliout, "%s\n", structure);
	free(ptable);

    cp = make_color_pinfo(pi);
      (void) PS_color_dot_plot(string, cp, dotfile);
      free(cp);
    free(mfe_struc);
    free(pi);
    }
  }
  


  if (cstruc!=NULL) free(cstruc);
  free(base_pair);
  (void) fflush(stdout);
  free(string);
  free(structure);
  for (i=0; AS[i]; i++) {
    free(AS[i]); free(names[i]);
  }

  ajSeqsetDel(&seq);
  ajStrDel(&constring);
  ajStrDel(&eenergy);
  ajStrDel(&edangles);
  ajStrDel(&ensbases);

  ajFileClose(&confile);
  ajFileClose(&paramfile);
  ajFileClose(&outf);
  ajFileClose(&essfile);
  ajFileClose(&alifile);
  ajFileClose(&dotfile);

  embExit();  

  return 0;
}
コード例 #12
0
int main(int argc, char **argv)
{
    AjPSeqall queryseqs;
    AjPSeqset targetseqs;
    AjPSeq queryseq;
    const AjPSeq targetseq;
    AjPStr queryaln = 0;
    AjPStr targetaln = 0;

    AjPFile errorf;
    AjBool show = ajFalse;

    const char   *queryseqc;
    const char   *targetseqc;

    AjPMatrixf matrix;
    AjPSeqCvt cvt = 0;
    float **sub;
    ajint *compass = NULL;
    float *path = NULL;

    float gapopen;
    float gapextend;
    float score;
    float minscore;

    ajuint j, k;
    ajint querystart = 0;
    ajint targetstart = 0;
    ajint queryend   = 0;
    ajint targetend   = 0;
    ajint width  = 0;
    AjPTable kmers = 0;
    ajint wordlen = 6;
    ajint oldmax = 0;
    ajint newmax = 0;

    ajuint ntargetseqs;
    ajuint nkmers;

    AjPAlign align = NULL;
    EmbPWordMatch maxmatch; /* match with maximum score */

    /* Cursors for the current sequence being scanned,
    ** i.e., until which location it was scanned.
    ** Separate cursor/location entries for each sequence in the seqset.
    */
    ajuint* lastlocation;

    EmbPWordRK* wordsw = NULL;
    AjPList* matchlist = NULL;

    embInit("supermatcher", argc, argv);

    matrix    = ajAcdGetMatrixf("datafile");
    queryseqs = ajAcdGetSeqall("asequence");
    targetseqs= ajAcdGetSeqset("bsequence");
    gapopen   = ajAcdGetFloat("gapopen");
    gapextend = ajAcdGetFloat("gapextend");
    wordlen   = ajAcdGetInt("wordlen");
    align     = ajAcdGetAlign("outfile");
    errorf    = ajAcdGetOutfile("errorfile");
    width     = ajAcdGetInt("width");	/* width for banded Smith-Waterman */
    minscore  = ajAcdGetFloat("minscore");

    gapopen   = ajRoundFloat(gapopen, 8);
    gapextend = ajRoundFloat(gapextend, 8);

    sub = ajMatrixfGetMatrix(matrix);
    cvt = ajMatrixfGetCvt(matrix);

    embWordLength(wordlen);

    /* seqset sequence is the reference sequence for SAM format */
    ajAlignSetRefSeqIndx(align, 1);

    ajSeqsetTrim(targetseqs);

    ntargetseqs = ajSeqsetGetSize(targetseqs);

    AJCNEW0(matchlist, ntargetseqs);

    /* get tables of words */
    for(k=0;k<ntargetseqs;k++)
    {
	targetseq = ajSeqsetGetseqSeq(targetseqs, k);
	embWordGetTable(&kmers, targetseq);
	ajDebug("Number of distinct kmers found so far: %d\n",
		ajTableGetLength(kmers));
    }
    AJCNEW0(lastlocation, ntargetseqs);

    if(ajTableGetLength(kmers)<1)
	ajErr("no kmers found");

    nkmers = embWordRabinKarpInit(kmers, &wordsw, wordlen, targetseqs);

    while(ajSeqallNext(queryseqs,&queryseq))
    {
	ajSeqTrim(queryseq);

	queryaln = ajStrNewRes(1+ajSeqGetLen(queryseq));

	ajDebug("Read '%S'\n", ajSeqGetNameS(queryseq));

	for(k=0;k<ntargetseqs;k++)
	{
	    lastlocation[k]=0;
	    matchlist[k] = ajListstrNew();
	}

	embWordRabinKarpSearch(ajSeqGetSeqS(queryseq), targetseqs,
		(const EmbPWordRK*)wordsw, wordlen, nkmers,
		matchlist, lastlocation, ajFalse);


	for(k=0;k<ajSeqsetGetSize(targetseqs);k++)
	{
	    targetseq      = ajSeqsetGetseqSeq(targetseqs, k);

	    ajDebug("Processing '%S'\n", ajSeqGetNameS(targetseq));

	    if(ajListGetLength(matchlist[k])==0)
	    {
		ajFmtPrintF(errorf,
		            "No wordmatch start points for "
		            "%s vs %s. No alignment\n",
		            ajSeqGetNameC(queryseq),ajSeqGetNameC(targetseq));
		embWordMatchListDelete(&matchlist[k]);
		continue;
	    }


	    /* only the maximum match is used as seed
	     * (if there is more than one location with the maximum match
	     * only the first one is used)
	     * TODO: we should add a new option to make above limit optional
	     */
	    maxmatch = embWordMatchFirstMax(matchlist[k]);

	    supermatcher_findendpoints(maxmatch,targetseq, queryseq,
		    &targetstart, &querystart,
		    &targetend, &queryend);

	    targetaln=ajStrNewRes(1+ajSeqGetLen(targetseq));
	    queryseqc = ajSeqGetSeqC(queryseq);
	    targetseqc = ajSeqGetSeqC(targetseq);

	    ajStrAssignC(&queryaln,"");
	    ajStrAssignC(&targetaln,"");

	    ajDebug("++ %S v %S start:%d %d end:%d %d\n",
		    ajSeqGetNameS(targetseq), ajSeqGetNameS(queryseq),
		    targetstart, querystart, targetend, queryend);

	    newmax = (targetend-targetstart+2)*width;

	    if(newmax > oldmax)
	    {
		AJCRESIZE0(path,oldmax,newmax);
		AJCRESIZE0(compass,oldmax,newmax);
		oldmax=newmax;
		ajDebug("++ memory re/allocation for path/compass arrays"
			" to size: %d\n", newmax);
	    }
	    else
	    {
		AJCSET0(path,newmax);
		AJCSET0(compass,newmax);
	    }

	    ajDebug("Calling embAlignPathCalcSWFast "
		    "%d..%d [%d/%d] %d..%d [%d/%d] width:%d\n",
		    querystart, queryend, (queryend - querystart + 1),
		    ajSeqGetLen(queryseq),
		    targetstart, targetend, (targetend - targetstart + 1),
		    ajSeqGetLen(targetseq),
		    width);

	    score = embAlignPathCalcSWFast(&targetseqc[targetstart],
	                                   &queryseqc[querystart],
	                                   targetend-targetstart+1,
	                                   queryend-querystart+1,
	                                   0,width,
	                                   gapopen,gapextend,
	                                   path,sub,cvt,
	                                   compass,show);
	    if(score>minscore)
	    {
		embAlignWalkSWMatrixFast(path,compass,gapopen,gapextend,
		                         targetseq,queryseq,
		                         &targetaln,&queryaln,
		                         targetend-targetstart+1,
		                         queryend-querystart+1,
		                         0,width,
		                         &targetstart,&querystart);

		if(!ajAlignFormatShowsSequences(align))
		{
		    ajAlignDefineCC(align, ajStrGetPtr(targetaln),
		                    ajStrGetPtr(queryaln),
		                    ajSeqGetNameC(targetseq),
		                    ajSeqGetNameC(queryseq));
		    ajAlignSetScoreR(align, score);
		}
		else
		{
		    ajDebug(" queryaln:%S \ntargetaln:%S\n",
		            queryaln,targetaln);
		    embAlignReportLocal(align,
			    queryseq, targetseq,
			    queryaln, targetaln,
			    querystart, targetstart,
			    gapopen, gapextend,
			    score, matrix,
			    1 + ajSeqGetOffset(queryseq),
			    1 + ajSeqGetOffset(targetseq)
		    );
		}
		ajAlignWrite(align);
		ajAlignReset(align);
	    }
	    ajStrDel(&targetaln);

	    embWordMatchListDelete(&matchlist[k]);
	}

	ajStrDel(&queryaln);
    }


    for(k=0;k<nkmers;k++)
    {
	AJFREE(wordsw[k]->seqindxs);
	AJFREE(wordsw[k]->nSeqMatches);

	for(j=0;j<wordsw[k]->nseqs;j++)
	    AJFREE(wordsw[k]->locs[j]);

	AJFREE(wordsw[k]->nnseqlocs);
	AJFREE(wordsw[k]->locs);
	AJFREE(wordsw[k]);
    }

    embWordFreeTable(&kmers);

    if(!ajAlignFormatShowsSequences(align))
	ajMatrixfDel(&matrix);
    
    AJFREE(path);
    AJFREE(compass);
    AJFREE(kmers);
    AJFREE(wordsw);

    AJFREE(matchlist);
    AJFREE(lastlocation);

    ajAlignClose(align);
    ajAlignDel(&align);
    ajSeqallDel(&queryseqs);
    ajSeqDel(&queryseq);
    ajSeqsetDel(&targetseqs);
    ajFileClose(&errorf);

    embExit();

    return 0;
}
コード例 #13
0
void emboss_copy(AjPSeqset seqset, char ***retseqs, AINFO *info)
{
    ajint n;
    ajint maxlen;
    ajint len;
    char **seqs;
    const AjPSeq seq = NULL;
    ajint i=0;
    const AjPStr fmt=NULL;
    const char *p=NULL;
    char  c='\0';
    /*
    char *q=NULL;
    AjPSelexseq   sqdata=NULL;
    AjPSelexdata sdata=NULL;
    */
    ajint cnt=0;
    info->name = NULL;
    info->rf=NULL;
    info->cs=NULL;
    info->desc=NULL;
    info->acc=NULL;
    info->au=NULL;
    info->flags=0;

    AjPStr tmpstr = NULL;

    ajSeqsetFill(seqset);

    fmt = ajSeqsetGetFormat(seqset);
    n = ajSeqsetGetSize(seqset);
    ajSeqsetFmtUpper(seqset);

    maxlen = ajSeqsetGetLen(seqset);


    /* First allocate and copy sequences */
    AJCNEW0(seqs,n);
    for(i=0; i<n; ++i)
    {
        seqs[i] = ajCharNewRes(maxlen+1);
        strcpy(seqs[i],ajSeqGetSeqC(ajSeqsetGetseqSeq(seqset,i)));
    }

    info->sqinfo = (SQINFO *) calloc (sizeof(SQINFO), n);

    for(i=0; i<n; ++i)
    {
        info->sqinfo[i].flags = 0;
        strcpy(info->sqinfo[i].name,"");
        strcpy(info->sqinfo[i].id,"");
        strcpy(info->sqinfo[i].acc,"");
        strcpy(info->sqinfo[i].desc,"");
        info->sqinfo[i].len = 0;
        info->sqinfo[i].start = 0;
        info->sqinfo[i].stop = 0;
        info->sqinfo[i].olen = 0;
        info->sqinfo[i].type = 0;
        info->sqinfo[i].ss = NULL;
        info->sqinfo[i].sa =NULL;
    }

    AJCNEW0(info->wgt,n);

    for(i=0; i<n; ++i)
    {
        info->sqinfo[i].flags = 0;
        info->wgt[i] = ajSeqsetGetseqWeight(seqset,i);
    }
    info->nseq = n;
    info->alen = maxlen;

    for(i=0; i<n; ++i)
    {
        seq = ajSeqsetGetseqSeq(seqset,i);
        if((len=ajStrGetLen(ajSeqGetNameS(seq))))
        {
            if(len>= SQINFO_NAMELEN)
                len = SQINFO_NAMELEN - 1;
            ajStrAssignSubS(&tmpstr, ajSeqGetNameS(seq), 0, len);
            strcpy(info->sqinfo[i].id,ajStrGetPtr(tmpstr));
            info->sqinfo[i].flags |= SQINFO_ID;
            strcpy(info->sqinfo[i].name,ajStrGetPtr(tmpstr));
            info->sqinfo[i].flags |= SQINFO_NAME;
        }
        if((len=ajStrGetLen(ajSeqGetAccS(seq))))
        {
            if(len>= SQINFO_NAMELEN)
                len = SQINFO_NAMELEN - 1;
            ajStrAssignSubS(&tmpstr, ajSeqGetAccS(seq), 0, len);
            strcpy(info->sqinfo[i].acc,ajStrGetPtr(tmpstr));
            info->sqinfo[i].flags |= SQINFO_ACC;
        }
    }
    seq = ajSeqsetGetseqSeq(seqset,0);
    info->cs = ajCharNewS(ajSeqGetSeqS(seq));
    info->name = ajCharNewS(ajSeqGetNameS(seq));
    info->acc = ajCharNewS(ajSeqGetAccS(seq));
    info->desc = ajCharNewS(ajSeqGetDescS(seq));
    info->rf = ajCharNewS(ajSeqGetSeqS(seq));

    /*
        info->rf = ajCharNewS(seq);

    	len = ajStrGetLen(seq->Selexdata->name);
    	info->name = ajCharNewRes(len+1);
    	strcpy(info->name,ajStrGetPtr(seq->Selexdata->name));
    	len = ajStrGetLen(seq->Selexdata->de);
    	info->desc = ajCharNewRes(len+1);

    	sdata = seq->Selexdata;
    	strcpy(info->desc,ajStrGetPtr(sdata->de));
    	len = ajStrGetLen(sdata->ac);
    	info->acc = ajCharNewRes(len+1);
    	strcpy(info->acc,ajStrGetPtr(sdata->ac));
    	len = ajStrGetLen(sdata->au);
    	info->au = ajCharNewRes(len+1);
    	strcpy(info->au,ajStrGetPtr(sdata->au));
    	if(sdata->tc[0] || sdata->tc[1])
    	{
    	    info->flags |= AINFO_TC;
    	    info->tc1 = sdata->tc[0];
    	    info->tc2 = sdata->tc[1];
    	}
    	if(sdata->nc[0] || sdata->nc[1])
    	{
    	    info->flags |= AINFO_NC;
    	    info->nc1 = sdata->nc[0];
    	    info->nc2 = sdata->nc[1];
    	}
    	if(sdata->ga[0] || sdata->ga[1])
    	{
    	    info->flags |= AINFO_GA;
    	    info->ga1 = sdata->ga[0];
    	    info->ga2 = sdata->ga[1];
    	}

    	for(i=0;i<n;++i)
    	{
    	    seq = ajSeqsetGetseqSeq(seqset,i);
    	    sqdata = seq->Selexdata->sq;
    	    if((len=ajStrGetLen(sqdata->name)))
    	    {
    		if(len<64)
    		    strcpy(info->sqinfo[i].name,ajStrGetPtr(sqdata->name));
    		else
    		    strncpy(info->sqinfo[i].name,ajStrGetPtr(sqdata->name),63);
    		info->sqinfo[i].name[63]='\0';
    		info->sqinfo[i].flags |= SQINFO_NAME;
    	    }
    / *
    	    if((len=ajStrGetLen(sqdata->id)))
    	    {
    		if(len<64)
    		    strcpy(info->sqinfo[i].id,ajStrGetPtr(sqdata->id));
    		else
    		    strncpy(info->sqinfo[i]->id,ajStrGetPtr(sqdata->id),63);
    		info->sqinfo[i].id[63]='\0';
    		info->sqinfo[i].flags |= SQINFO_ID;
    	    }
    * /

    	    strcpy(info->sqinfo[i].id,info->sqinfo[i].name);
    	    info->sqinfo[i].flags |= SQINFO_ID;
    	    if((len=ajStrGetLen(sqdata->ac)))
    	    {
    		if(len<64)
    		    strcpy(info->sqinfo[i].acc,ajStrGetPtr(sqdata->ac));
    		else
    		    strncpy(info->sqinfo[i].acc,ajStrGetPtr(sqdata->ac),63);
    		info->sqinfo[i].acc[63]='\0';
    		info->sqinfo[i].flags |= SQINFO_ACC;
    	    }
    	    if((len=ajStrGetLen(sqdata->de)))
    	    {
    		if(len<127)
    		    strcpy(info->sqinfo[i].desc,ajStrGetPtr(sqdata->de));
    		else
    		    strncpy(info->sqinfo[i].desc,ajStrGetPtr(sqdata->de),127);
    		info->sqinfo[i].desc[127]='\0';
    		info->sqinfo[i].flags |= SQINFO_DESC;
    	    }
    	    if(sqdata->start || sqdata->stop || sqdata ->len)
    	    {
    		info->sqinfo[i].start = sqdata->start;
    		info->sqinfo[i].stop  = sqdata->stop;
    		info->sqinfo[i].olen  = sqdata->len;
    		info->sqinfo[i].flags |= SQINFO_START;
    		info->sqinfo[i].flags |= SQINFO_STOP;
    		info->sqinfo[i].flags |= SQINFO_OLEN;
    	    }

    	    if(ajStrGetLen(seq->Selexdata->ss))
    	    {

    		info->sqinfo[i].ss = ajCharNewRes(maxlen+1);
    		p = ajStrGetPtr(seq->Selexdata->ss);
    		q = info->sqinfo[i].ss;
    		while((c==*p))
    		{
    		    if(c=='.' || c==' ' || c=='_' || c=='-')
    			*q++ = c;
    		    ++p;
    		}
    		*q = '\0';
    		info->sqinfo[i].flags |= SQINFO_SS;
    	    }
    	}
        }
    / *
        }
    */


    for(i=0; i<n; ++i)
    {
        info->sqinfo[i].type = kOtherSeq;
        if(ajSeqsetIsDna(seqset))
            info->sqinfo[i].type = kDNA;
        if(ajSeqsetIsRna(seqset))
            info->sqinfo[i].type = kRNA;
        if(ajSeqsetIsProt(seqset))
            info->sqinfo[i].type = kAmino;
        info->sqinfo[i].flags |= SQINFO_TYPE;

        seq = ajSeqsetGetseqSeq(seqset,i);

        p = ajSeqGetSeqC(seq);
        cnt = 0;
        while((c=*p))
        {
            if(!(c=='.' || c==' ' || c=='_' || c=='-' || c=='~'))
                ++cnt;
            ++p;
        }
        info->sqinfo[i].len = cnt;
        info->sqinfo[i].flags |= SQINFO_LEN;
    }


    *retseqs = seqs;
    ajStrDel(&tmpstr);

    return;
}
コード例 #14
0
int main(int argc, char **argv)
{
    AjPSeqset seqset;
    AjPSeqall seqall;
    AjPSeq queryseq;
    const AjPSeq targetseq;
    ajint wordlen;
    AjPTable wordsTable = NULL;
    AjPList* matchlist = NULL;
    AjPFile logfile;
    AjPFeattable* seqsetftables = NULL;
    AjPFeattable seqallseqftable = NULL;
    AjPFeattabOut ftoutforseqsetseq = NULL;
    AjPFeattabOut ftoutforseqallseq = NULL;
    AjPAlign align = NULL;
    AjIList iter = NULL;
    ajint targetstart;
    ajint querystart;
    ajint len;
    ajuint i, j;
    ajulong nAllMatches = 0;
    ajulong sumAllScore = 0;
    AjBool dumpAlign = ajTrue;
    AjBool dumpFeature = ajTrue;
    AjBool checkmode = ajFalse;
    EmbPWordRK* wordsw = NULL;
    ajuint npatterns = 0;
    ajuint seqsetsize;
    ajuint nmatches;
    ajuint* nmatchesseqset;
    ajuint* lastlocation; /* Cursors for Rabin-Karp search. */
                          /* Shows until what point the query sequence was
                           *  scanned for a pattern sequences in the seqset.
                          */
    char* paddedheader = NULL;
    const char* header;
    AjPStr padding;

    header = "Pattern %S  #pat-sequences  #all-matches  avg-match-length\n";
    padding = ajStrNew();

    embInit("wordmatch", argc, argv);

    wordlen = ajAcdGetInt("wordsize");
    seqset  = ajAcdGetSeqset("asequence");
    seqall  = ajAcdGetSeqall("bsequence");
    logfile = ajAcdGetOutfile("logfile");
    dumpAlign = ajAcdGetToggle("dumpalign");
    dumpFeature = ajAcdGetToggle("dumpfeat");

    if(dumpAlign)
    {
        align = ajAcdGetAlign("outfile");
        ajAlignSetExternal(align, ajTrue);
    }

    seqsetsize = ajSeqsetGetSize(seqset);
    ajSeqsetTrim(seqset);
    AJCNEW0(matchlist, seqsetsize);
    AJCNEW0(seqsetftables, seqsetsize);
    AJCNEW0(nmatchesseqset, seqsetsize);

    if (dumpFeature)
    {
        ftoutforseqsetseq =  ajAcdGetFeatout("aoutfeat");
        ftoutforseqallseq =  ajAcdGetFeatout("boutfeat");
    }

    checkmode = !dumpFeature && !dumpAlign;
    embWordLength(wordlen);

    ajFmtPrintF(logfile, "Small sequence/file for constructing"
	    " target patterns: %S\n", ajSeqsetGetUsa(seqset));
    ajFmtPrintF(logfile, "Large sequence/file to be scanned"
	    " for patterns: %S\n", ajSeqallGetUsa(seqall));
    ajFmtPrintF(logfile, "Number of sequences in the patterns file: %u\n",
            seqsetsize);
    ajFmtPrintF(logfile, "Pattern/word length: %u\n", wordlen);

    for(i=0;i<seqsetsize;i++)
    {
        targetseq = ajSeqsetGetseqSeq(seqset, i);
        embWordGetTable(&wordsTable, targetseq);
    }

    AJCNEW0(lastlocation, seqsetsize);

    if(ajTableGetLength(wordsTable)>0)
    {
        npatterns = embWordRabinKarpInit(wordsTable,
                                       &wordsw, wordlen, seqset);
        ajFmtPrintF(logfile, "Number of patterns/words found: %u\n", npatterns);

        while(ajSeqallNext(seqall,&queryseq))
        {
            for(i=0;i<seqsetsize;i++)
            {
                lastlocation[i]=0;

                if (!checkmode)
                    matchlist[i] = ajListstrNew();
            }

            nmatches = embWordRabinKarpSearch(
                    ajSeqGetSeqS(queryseq), seqset,
                    (EmbPWordRK const *)wordsw, wordlen, npatterns,
                    matchlist, lastlocation, checkmode);
            nAllMatches += nmatches;

            if (checkmode)
        	continue;

            for(i=0;i<seqsetsize;i++)
            {
                if(ajListGetLength(matchlist[i])>0)
                {
                    iter = ajListIterNewread(matchlist[i]) ;

                    while(embWordMatchIter(iter, &targetstart, &querystart, &len,
                            &targetseq))
                    {
                        if(dumpAlign)
                        {
                            ajAlignDefineSS(align, targetseq, queryseq);
                            ajAlignSetScoreI(align, len);
                            /* ungapped alignment means same length
                             *  for both sequences
                            */
                            ajAlignSetSubRange(align, targetstart, 1, len,
                                    ajSeqIsReversed(targetseq),
                                    ajSeqGetLen(targetseq),
                                    querystart, 1, len,
                                    ajSeqIsReversed(queryseq),
                                    ajSeqGetLen(queryseq));
                        }
                    }

                    if(dumpAlign)
                    {
                	ajAlignWrite(align);
                	ajAlignReset(align);
                    }

                    if(ajListGetLength(matchlist[i])>0 && dumpFeature)
                    {
                        embWordMatchListConvToFeat(matchlist[i],
                                                   &seqsetftables[i],
                                                   &seqallseqftable,
                                                   targetseq, queryseq);
                        ajFeattableWrite(ftoutforseqallseq, seqallseqftable);
                        ajFeattableDel(&seqallseqftable);
                    }

                    ajListIterDel(&iter);
                }

                embWordMatchListDelete(&matchlist[i]);
            }
        }

        /* search completed, now report statistics */
        for(i=0;i<npatterns;i++)
        {
            sumAllScore += wordsw[i]->lenMatches;

            for(j=0;j<wordsw[i]->nseqs;j++)
        	nmatchesseqset[wordsw[i]->seqindxs[j]] +=
        		wordsw[i]->nSeqMatches[j];
        }

        ajFmtPrintF(logfile, "Number of sequences in the file scanned "
                "for patterns: %u\n", ajSeqallGetCount(seqall));
        ajFmtPrintF(logfile, "Number of all matches: %Lu"
                " (wordmatch finds exact matches only)\n", nAllMatches);

        if(nAllMatches>0)
        {
            ajFmtPrintF(logfile, "Sum of match lengths: %Lu\n", sumAllScore);
            ajFmtPrintF(logfile, "Average match length: %.2f\n",
        	    sumAllScore*1.0/nAllMatches);

            ajFmtPrintF(logfile, "\nDistribution of the matches among pattern"
        	    " sequences:\n");
            ajFmtPrintF(logfile, "-----------------------------------------"
        	    "-----------\n");

            for(i=0;i<ajSeqsetGetSize(seqset);i++)
            {
        	if (nmatchesseqset[i]>0)
        	    ajFmtPrintF(logfile, "%-42s: %8u\n",
        	                ajSeqGetNameC(ajSeqsetGetseqSeq(seqset, i)),
        	                nmatchesseqset[i]);

        	ajFeattableWrite(ftoutforseqsetseq, seqsetftables[i]);
        	ajFeattableDel(&seqsetftables[i]);
            }

            ajFmtPrintF(logfile, "\nPattern statistics:\n");
            ajFmtPrintF(logfile, "-------------------\n");
            if(wordlen>7)
        	ajStrAppendCountK(&padding, ' ', wordlen-7);
            paddedheader = ajFmtString(header,padding);
            ajFmtPrintF(logfile, paddedheader);

            for(i=0;i<npatterns;i++)
        	if (wordsw[i]->nMatches>0)
        	    ajFmtPrintF(logfile, "%-7s: %12u  %12u %17.2f\n",
        	                wordsw[i]->word->fword, wordsw[i]->nseqs,
        	                wordsw[i]->nMatches,
        	                wordsw[i]->lenMatches*1.0/wordsw[i]->nMatches);
        }

    }

    for(i=0;i<npatterns;i++)
    {
        for(j=0;j<wordsw[i]->nseqs;j++)
            AJFREE(wordsw[i]->locs[j]);

        AJFREE(wordsw[i]->locs);
        AJFREE(wordsw[i]->seqindxs);
        AJFREE(wordsw[i]->nnseqlocs);
        AJFREE(wordsw[i]->nSeqMatches);
        AJFREE(wordsw[i]);
    }

    embWordFreeTable(&wordsTable);

    AJFREE(wordsw);
    AJFREE(matchlist);
    AJFREE(lastlocation);
    AJFREE(nmatchesseqset);
    AJFREE(seqsetftables);

    if(dumpAlign)
    {
        ajAlignClose(align);
        ajAlignDel(&align);
    }

    if(dumpFeature)
    {
        ajFeattabOutDel(&ftoutforseqsetseq);
        ajFeattabOutDel(&ftoutforseqallseq);
    }

    ajFileClose(&logfile);

    ajSeqallDel(&seqall);
    ajSeqsetDel(&seqset);
    ajSeqDel(&queryseq);
    ajStrDel(&padding);
    AJFREE(paddedheader);

    embExit();

    return 0;
}
コード例 #15
0
/* @prog seqnr **************************************************************
**
** Removes redundancy from DHF files (domain hits files) or other files of 
** sequences.
**
****************************************************************************/
int main(int argc, char **argv)
{
    /* Variable declarations */
    AjPList    in        = NULL;    /* Names of domain hits files (input).   */
    AjPStr     inname    = NULL;    /* Full name of the current DHF file.    */
    AjPFile    inf       = NULL;    /* Current DHF file.                     */
    EmbPHitlist infhits   = NULL;   /* Hitlist from DHF file                 */
    AjBool     dosing    = ajFalse; /* Filter using singlet sequences.       */
    AjPDir     singlets  = NULL;    /* Singlets (input).                     */
    AjBool     dosets    = ajFalse; /* Filter using sets of sequences.       */
    AjPDir     insets    = NULL;    /* Sets (input).                         */
    AjPStr     mode      = NULL;    /* Mode of operation                     */
    ajint      moden     = 0;       /* Mode 1: single threshold for redundancy
				       removal, 2: lower and upper thresholds
				       for redundancy removal.               */
    float      thresh    = 0.0;     /* Threshold for non-redundancy.         */
    float      threshlow = 0.0;	    /* Threshold (lower limit).              */
    float      threshup  = 0.0;	    /* Threshold (upper limit).              */
    AjPMatrixf matrix    = NULL;    /* Substitution matrix.                  */
    float      gapopen   = 0.0;     /* Gap insertion penalty.                */
    float      gapextend = 0.0;     /* Gap extension penalty.                */
    AjPDirout  out       = NULL;    /* Domain hits files (output).           */
    AjPFile    outf      = NULL;    /* Current DHF file (output).            */
    AjBool     dored     = ajFalse; /* True if redundant hits are output.    */
    AjPDirout  outred    = NULL;    /* DHF files for redundant hits (output).*/
    AjPFile    redf      = NULL;    /* Current DHF file redundancy (output). */
    AjPStr     outname   = NULL;    /* Name of output file (re-used).        */
    AjPFile    logf      = NULL;    /* Log file pointer.                     */
 
    AjBool     ok        = ajFalse; /* Housekeeping.                         */
    AjPSeqset  seqset    = NULL;    /* Seqset (re-used).                     */
    AjPSeqin   seqin     = NULL;    /* Seqin (re-used).                      */
    AjPList    seq_list  = NULL;    /* Main list for redundancy removal.     */
    EmbPDmxNrseq seq_tmp = NULL;    /* Temp. pointer for making seq_list.    */
    ajint      seq_siz   = 0;       /* Size of seq_list.                     */
    AjPUint    keep      = NULL;    /* 1: Sequence in seq_list was classed as
				       non-redundant, 0: redundant.          */
    AjPUint    nokeep    = NULL;    /* Inversion of keep array.              */
    ajint      nseqnr    = 0;       /* No. non-redundant seqs. in seq_list.  */
    

    AjPStr     filtername= NULL;    /* Name of filter file (re-used).        */
    AjPFile    filterf   = NULL;    /* Current filter file.                  */
    EmbPHitlist hitlist   = NULL;   /* Hitlist from input file (re-used).    */
    AjPScopalg scopalg   = NULL;    /* Scopalg from input file.              */
    ajint      x         = 0;       /* Housekeeping.                         */
    

    


    /* Read data from acd. */
    embInitPV("seqnr",argc,argv,"DOMSEARCH",VERSION);

    in        = ajAcdGetDirlist("dhfinpath");
    dosing    = ajAcdGetToggle("dosing");
    singlets    = ajAcdGetDirectory("singletsdir");
    dosets    = ajAcdGetToggle("dosets");
    insets    = ajAcdGetDirectory("insetsdir");
    mode      = ajAcdGetListSingle("mode");  
    thresh    = ajAcdGetFloat("thresh");
    threshlow = ajAcdGetFloat("threshlow");
    threshup  = ajAcdGetFloat("threshup");
    matrix    = ajAcdGetMatrixf("matrix");
    gapopen   = ajAcdGetFloat("gapopen");
    gapextend = ajAcdGetFloat("gapextend");
    out       = ajAcdGetOutdir("dhfoutdir");
    dored     = ajAcdGetToggle("dored");
    outred    = ajAcdGetOutdir("redoutdir");
    logf      = ajAcdGetOutfile("logfile");



    /* Housekeeping. */
    filtername  = ajStrNew();
    outname     = ajStrNew();


    if(!(ajStrToInt(mode, &moden)))
	ajFatal("Could not parse ACD node option");


    
    /* Process each DHF (input) in turn. */
    while(ajListPop(in,(void **)&inname))
    {
	ajFmtPrint("Processing %S\n", inname);
	ajFmtPrintF(logf, "//\n%S\n", inname);


	seq_list    = ajListNew();
	keep        = ajUintNew();  	    
	nokeep      = ajUintNew();  	    	
	
	/**********************************/
	/*         Open DHF file          */
	/**********************************/
	if((inf = ajFileNewInNameS(inname)) == NULL)
	    ajFatal("Could not open DHF file %S", inname);

	/* Read DHF file. */
	ok = ajFalse;
	if(!(infhits = embHitlistReadFasta(inf)))
	{
	    ajWarn("embHitlistReadFasta call failed in seqnr");
	    ajFmtPrintF(logf, "embHitlistReadFasta call failed in seqnr\n");
	
	    /* Read sequence set instead. */ 
	    seqset = ajSeqsetNew();
	    seqin  = ajSeqinNew();
	    ajSeqinUsa(&seqin, inname);
	
	    if(!(ajSeqsetRead(seqset, seqin)))
		ajFatal("SeqsetRead failed in seqsearch_psialigned");

	    if(ajSeqsetGetSize(seqset))
		ok = ajTrue;
	}
	else
	    if(infhits->N)
		ok = ajTrue;

	/* Close DHF file. */
	ajFileClose(&inf);
	
	/* Process empty DHF files (should never occur). */
	if(!ok)
	{		
	    ajWarn("Empty input file %S\n", inname);
	    ajFmtPrintF(logf, "Empty input file %S\n", inname);
	    if(infhits)
		embHitlistDel(&infhits);
	    if(seqset)
		ajSeqsetDel(&seqset);
	    if(seqin)
		ajSeqinDel(&seqin);
	    continue;
	}	

	
	/* 1.  Create list of sequences from the main input directory.. */
	if(infhits)
	{
	    for(x=0; x<infhits->N; x++)
	    {
		AJNEW0(seq_tmp);
		seq_tmp->Seq = ajSeqNew();
		ajStrAssignS(&seq_tmp->Seq->Acc,infhits->hits[x]->Acc);
		ajStrAssignS(&seq_tmp->Seq->Seq,infhits->hits[x]->Seq);
		ajListPushAppend(seq_list,seq_tmp);		
	    }
	} 
	else
	{	 
	    for(x=0;x<ajSeqsetGetSize(seqset);x++)
	    {
		AJNEW0(seq_tmp);
		seq_tmp->Seq = ajSeqNew();
		ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x));
		ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x));
		ajListPushAppend(seq_list,seq_tmp);		
	    }
	    ajSeqsetDel(&seqset);
	    ajSeqinDel(&seqin);
	}
	
    

	/**********************************/
	/*   Open singlets filter file    */
	/**********************************/
	if(dosing)
	{
	    /* Open singlets file. */
	    ajStrAssignS(&filtername, inname);
	    ajFilenameTrimPathExt(&filtername);
	    ajStrInsertS(&filtername, 0, ajDirGetPath(singlets));
	    ajStrAppendC(&filtername, ".");
	    ajStrAppendS(&filtername, ajDirGetExt(singlets));

	
	    if((filterf = ajFileNewInNameS(filtername)) == NULL)
	    {
		ajWarn("Could not open DHF file %S",
		       filtername);
		ajFmtPrint("Could not open singlets filter file %S",
			   filtername);
	    }
	    else
	    {
		/* Read DHF file. */
		ok = ajFalse;
		if(!(hitlist = embHitlistReadFasta(filterf)))
		{
		    ajWarn("embHitlistReadFasta call failed in seqnr");
		    ajFmtPrintF(logf, 
				"embHitlistReadFasta call failed in seqnr\n");
	
		    /* Read sequence set instead. */ 
		    seqset = ajSeqsetNew();
		    seqin  = ajSeqinNew();
		    ajSeqinUsa(&seqin, inname);
	
		    if(!(ajSeqsetRead(seqset, seqin)))
			ajFatal("SeqsetRead failed in seqnr");

		    if(ajSeqsetGetSize(seqset))
			ok = ajTrue;
		}
		else
		    if(hitlist->N)
			ok = ajTrue;


		/* Close DHF file. */
		ajFileClose(&filterf);

	
		/* Process empty DHF files (should never occur). */
		if(!ok)
		{		
		    ajWarn("Empty singlets filter file %S\n", filtername);
		    ajFmtPrintF(logf, "Empty singlets filter file %S\n", 
				filtername);
		    /* No continue this time. */
		}	

	
		/* 2. Add sequences from filter directories to List but mark 
		   them up (they are considered in the redundancy calculation 
		   but never appear in the output files). */
		if(hitlist)
		{
		    for(x=0; x<hitlist->N; x++)
		    {
			AJNEW0(seq_tmp);
			seq_tmp->Seq = ajSeqNew();
			seq_tmp->Garbage = ajTrue;
			ajStrAssignS(&seq_tmp->Seq->Acc,hitlist->hits[x]->Acc);
			ajStrAssignS(&seq_tmp->Seq->Seq,hitlist->hits[x]->Seq);
			ajListPushAppend(seq_list,seq_tmp);		
		    }
		    embHitlistDel(&hitlist);
		} 
		else
		{	 
		    for(x=0;x<ajSeqsetGetSize(seqset);x++)
		    {
			AJNEW0(seq_tmp);
			seq_tmp->Seq = ajSeqNew();
			seq_tmp->Garbage = ajTrue;
			ajStrAssignS(&seq_tmp->Seq->Acc,
				     ajSeqsetGetseqAccS(seqset, x));
			ajStrAssignS(&seq_tmp->Seq->Seq,
				     ajSeqsetGetseqSeqS(seqset, x));
			ajListPushAppend(seq_list,seq_tmp);		
		    }
		    ajSeqsetDel(&seqset);
		    ajSeqinDel(&seqin);
		}
	    }
	}
	
	
	
	/**********************************/
	/*      Open sets filter file     */
	/**********************************/
	if(dosets)
	{
	    /* Open sets file. */
	    ajStrAssignS(&filtername, inname);
	    ajFilenameTrimPathExt(&filtername);
	    ajStrInsertS(&filtername, 0, ajDirGetPath(insets));
	    ajStrAppendC(&filtername, ".");
	    ajStrAppendS(&filtername, ajDirGetExt(insets));

	
	    if((filterf = ajFileNewInNameS(filtername)) == NULL)
	    {
		ajWarn("Could not open DAF file %S", filtername);
		ajFmtPrint("Could not open sets filter file %S", filtername);
	    }
	    else
	    {
		/* Read DAF file. */
		ok = ajFalse;

		if(!(ajDmxScopalgRead(filterf, &scopalg)))
		{
		    ajWarn("ajDmxScopalgRead call failed in seqnr");
		    ajFmtPrintF(logf,
				"ajDmxScopalgRead call failed in seqnr\n");
	
		    /* Read sequence set instead. */ 
		    seqset = ajSeqsetNew();
		    seqin  = ajSeqinNew();
		    ajSeqinUsa(&seqin, inname);
		    
		    if(!(ajSeqsetRead(seqset, seqin)))
			ajFatal("SeqsetRead failed in seqnr");

		    if(ajSeqsetGetSize(seqset))
			ok = ajTrue;
		}
		else
		    if(scopalg->N)
			ok = ajTrue;


		/* Close DHF file. */
		ajFileClose(&filterf);

	
		/* Process empty DHF files (should never occur). */
		if(!ok)
		{		
		    ajWarn("Empty sets filter file %S\n",
			   filtername);
		    ajFmtPrintF(logf, "Empty sets filter file %S\n",
				filtername);
		    /* No continue this time. */
		}	

	
		/* 2. Add sequences from filter directories to List but mark 
		   them up (they are considered in the redundancy calculation 
		   but never appear in the output files).. */
		if(scopalg)
		{
		    for(x=0; x<scopalg->N; x++)
		    {
			AJNEW0(seq_tmp);
			seq_tmp->Seq = ajSeqNew();
			seq_tmp->Garbage = ajTrue;
			ajStrAssignS(&seq_tmp->Seq->Acc,scopalg->Codes[x]);
			ajStrAssignS(&seq_tmp->Seq->Seq,scopalg->Seqs[x]);
			/* Remove gap char's & whitespace. */
			ajStrRemoveGap(&seq_tmp->Seq->Seq);  
			ajListPushAppend(seq_list,seq_tmp);		
		    }
		    ajDmxScopalgDel(&scopalg);
		} 
		else
		{	 
		    for(x=0;x<ajSeqsetGetSize(seqset);x++)
		    {
			AJNEW0(seq_tmp);
			seq_tmp->Seq = ajSeqNew();
			seq_tmp->Garbage = ajTrue;
			ajStrAssignS(&seq_tmp->Seq->Acc,
				     ajSeqsetGetseqAccS(seqset, x));
			ajStrAssignS(&seq_tmp->Seq->Seq,
				     ajSeqsetGetseqSeqS(seqset, x));
			ajListPushAppend(seq_list,seq_tmp);		
		    }
		    ajSeqsetDel(&seqset);
		    ajSeqinDel(&seqin);
		}
	    }
	}
	
	
	/* 4. Identify redundant domains.. */
	if(moden == 1)
	{
	    if((!embDmxSeqNR(seq_list, &keep, &nseqnr, matrix, gapopen, 
			     gapextend, thresh, ajTrue)))
		ajFatal("embDmxSeqNR failure in seqnr");
	}		
	else
	{
	    if((!embDmxSeqNRRange(seq_list, &keep, &nseqnr, matrix, gapopen, 
			     gapextend, threshlow, threshup, ajTrue)))
		ajFatal("embDmxSeqNR failure in seqnr");
	}	
	seq_siz = ajListGetLength(seq_list);
	for(x=0; x<seq_siz; x++)
	    if(ajUintGet(keep, x) == 1)
		ajUintPut(&nokeep, x, 0);
	    else
		ajUintPut(&nokeep, x, 1);
	

	/* Create output files. */
	ajStrAssignS(&outname, inname);
	ajFilenameTrimPathExt(&outname);
	outf = ajFileNewOutNameDirS(outname, out);
	if(dored)
	    redf = ajFileNewOutNameDirS(outname, outred);
	

	/* 5. Write non-redundant domains to main output directory.  
	   6.  If specified, write redundant domains to output directory. */
	embHitlistWriteSubsetFasta(outf, infhits, keep);
	if(dored)
	    embHitlistWriteSubsetFasta(redf, infhits, nokeep);

	embHitlistDel(&infhits);
	ajFileClose(&outf);
	ajFileClose(&redf);
	ajStrDel(&inname);

	while(ajListPop(seq_list, (void **) &seq_tmp))
	{
	    ajSeqDel(&seq_tmp->Seq);
	    AJFREE(seq_tmp);
	}
	ajListFree(&seq_list);
	ajUintDel(&keep);	
	ajUintDel(&nokeep);
    }	    


    /* Tidy up. */
    ajListFree(&in);
    if(singlets)
	ajDirDel(&singlets);
    if(insets)
	ajDirDel(&insets);
    ajDiroutDel(&out);
    if(outred)
	ajDiroutDel(&outred);
    ajFileClose(&logf);

    ajMatrixfDel(&matrix);

    ajStrDel(&filtername);
    ajStrDel(&outname);
    ajStrDel(&mode);


    embExit();
    return 0;
}