Ejemplo n.º 1
0
static void tranalign_AddGaps(AjPSeq newseq,
			      const AjPSeq nseq, const AjPSeq pseq,
			      ajlong npos)
{

    AjPStr newstr = NULL;
    ajuint ppos = 0;

    newstr = ajStrNew();

    for(; ppos<ajSeqGetLen(pseq); ppos++)
    	if(ajSeqGetSeqC(pseq)[ppos] == '-')
    	    ajStrAppendC(&newstr, "---");
	else
	{
    	    ajStrAppendSubS(&newstr, ajSeqGetSeqS(nseq), npos, npos+2);
    	    npos+=3;
    	}

    ajDebug("aligned seq=%S\n", newstr);
    ajSeqAssignSeqS(newseq, newstr);

    ajStrDel(&newstr);

    return;
}
Ejemplo n.º 2
0
int main(int argc, char **argv)
{
    AjPSeqall seqall;
    AjPSeq seq;
    AjPFile outf;
    AjPCod codon;
    AjPStr substr;
    ajint beg;
    ajint end;
    ajint ccnt;


    embInit("cusp", argc, argv);

    seqall = ajAcdGetSeqall("sequence");
    outf   = ajAcdGetOutfile("outfile");

    ccnt   = 0;
    substr = ajStrNew();
    codon  = ajCodNewCodenum(0);
    ajCodSetNameS(codon, ajFileGetPrintnameS(outf));

    while(ajSeqallNext(seqall, &seq))
    {
	beg = ajSeqallGetseqBegin(seqall);
	end  = ajSeqallGetseqEnd(seqall);
	ajStrAssignSubS(&substr,ajSeqGetSeqS(seq),beg-1,end-1);
	ajCodSetTripletsS(codon,substr,&ccnt);
    }

    ajCodCalcUsage(codon,ccnt);

    ajCodSetDescC(codon, "CUSP codon usage file");
    ajCodWrite(codon, outf);
    ajFileClose(&outf);

    ajStrDel(&substr);
    ajCodDel(&codon);

    ajSeqallDel(&seqall);
    ajSeqDel(&seq);

    embExit();

    return 0;
}
Ejemplo n.º 3
0
int main(int argc, char **argv)
{
    AjPSeqall  seqall;
    AjPSeq     seq;
    AjPFile    outf;
    AjPCod     codon;
    AjPStr     substr;
    AjBool     sum;
    
    ajint ccnt;
    ajint beg;
    ajint end;

    float Nc;
    double td;
    

    embInit("chips", argc, argv);

    seqall = ajAcdGetSeqall("seqall");
    outf   = ajAcdGetOutfile("outfile");
    sum    = ajAcdGetBoolean("sum");
    
    codon  = ajCodNewCodenum(0);

    ccnt = 0;
    substr = ajStrNew();

    while(ajSeqallNext(seqall, &seq))
    {
	beg = ajSeqallGetseqBegin(seqall);
	end = ajSeqallGetseqEnd(seqall);
	ajStrAssignSubS(&substr,ajSeqGetSeqS(seq),beg-1,end-1);
	ajStrFmtUpper(&substr);
	ajCodSetTripletsS(codon,substr,&ccnt);
	if(!sum)
	{
	    ajCodCalcUsage(codon,ccnt);
	    td = ajCodCalcNc(codon);
	    Nc = (float) td;
	    
	    ajFmtPrintF(outf,"%-20s Nc = %.3f\n",ajSeqGetNameC(seq),Nc);
	    ajCodClearData(codon);
	}
    }

    if(sum)
    {
	ajCodCalcUsage(codon,ccnt);
	td = ajCodCalcNc(codon);
	Nc = (float) td;
	
	ajFmtPrintF(outf,"# CHIPS codon usage statistics\n\n");
	ajFmtPrintF(outf,"Nc = %.3f\n",Nc);
    }
    
    ajFileClose(&outf);
    ajCodDel(&codon);
    ajSeqallDel(&seqall);
    ajSeqDel(&seq);
    ajStrDel(&substr);

    embExit();

    return 0;
}
Ejemplo n.º 4
0
void embPatternSeqSearch (AjPFeattable ftable, const AjPSeq seq,
			  const AjPPatternSeq pat, AjBool reverse)
{
    const void *tidy;
    ajuint hits;
    ajuint i;
    AjPPatComp pattern;
    EmbPMatMatch m = NULL;
    AjPFeature sf  = NULL;
    AjPSeq revseq  = NULL;
    AjPList list   = ajListNew();
    AjPStr seqstr  = ajStrNew();
    AjPStr seqname = ajStrNew();
    AjPStr tmp     = ajStrNew();
    ajint adj;
    ajint begin;
    AjBool isreversed;
    ajint seqlen;

    seqlen = ajSeqGetLen(seq);
    if(!seqlen)
        return;

    isreversed = ajSeqIsReversedTrue(seq);

    if(isreversed)
	seqlen += ajSeqGetOffset(seq);

    begin = ajSeqGetBeginTrue(seq);
    adj = ajSeqGetEndTrue(seq);

    if(!ajStrGetLen(featMotifProt))
        ajStrAssignC(&featMotifProt, "SO:0001067");

    if(!ajStrGetLen(featMotifNuc))
        ajStrAssignC(&featMotifNuc, "SO:0000714");

    ajStrAssignS(&seqname,ajSeqGetNameS(seq));
    pattern = ajPatternSeqGetCompiled(pat);

    if (reverse)
    {
        revseq = ajSeqNewSeq(seq);
        ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq),
			begin-1,adj-1);
        ajSeqstrReverse(&seqstr);
    }
    else
        ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq),
			begin-1,adj-1);

    ajStrFmtUpper(&seqstr);
    /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n"
	   "'%S'\n",
	   seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq),
	   ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq),
	   seqstr);*/

    ajDebug("embPatternSeqSearch '%S' protein: %B reverse: %B\n",
	    pattern->pattern, pat->Protein, reverse);
    embPatFuzzSearchII(pattern,begin,seqname,seqstr,list,
                       ajPatternSeqGetMismatch(pat),&hits,&tidy);

    ajDebug ("embPatternSeqSearch: found %d hits\n",hits);

    if(!reverse)
	ajListReverse(list);

    for(i=0;i<hits;++i)
    {
        ajListPop(list,(void **)&m);

 	if (reverse)
	    sf = ajFeatNew(ftable, NULL, featMotifNuc,
                           adj - m->start - m->len + begin + 1,
                           adj - m->start + begin,
                           0.0, '-', 0);
	else
        {
	    if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable))
                sf = ajFeatNewProt(ftable, NULL, featMotifProt,
                                   m->start,
                                   m->start + m->len - 1,
                                   0.0);
            else
                sf = ajFeatNew(ftable, NULL, featMotifNuc,
                               m->start,
                               m->start + m->len - 1,
                               0.0, '.', 0);
        }
        
	if(isreversed)
	    ajFeatReverse(sf, seqlen);

	/*
	ajUser("isrev: %B reverse: %B begin: %d adj: %d "
	       "start: %d len: %d seqlen: %d %d..%d '%c'\n",
	       isreversed, reverse, begin, adj, m->start, m->len, seqlen,
	       sf->Start, sf->End, sf->Strand);
	*/

	ajFeatSetScore(sf, (float) (m->len - m->mm));

        ajFmtPrintS(&tmp, "*pat %S: %S",
                    ajPatternSeqGetName(pat),
                    ajPatternSeqGetPattern(pat));
        ajFeatTagAdd(sf,NULL,tmp);

        if(m->mm)
        {
            ajFmtPrintS(&tmp, "*mismatch %d", m->mm);
            ajFeatTagAdd(sf, NULL, tmp);
        }

        embMatMatchDel(&m);
    }

    ajStrDel(&seqname);
    ajStrDel(&seqstr);
    ajStrDel(&tmp);
    ajListFree(&list);

    if (reverse)
        ajSeqDel(&revseq);

    return;
}
Ejemplo n.º 5
0
void getorf_FindORFs(const AjPSeq seq, ajint len, const AjPTrn trnTable,
                     ajuint minsize, ajuint maxsize, AjPSeqout seqout,
                     AjBool sense, AjBool circular, ajint find,
                     ajint *orf_no, AjBool methionine, ajint around,
                     ORFrec *record) {
  AjBool ORF[3];            /* true if found an ORF */
  AjBool LASTORF[3];         /* true if hit the end of an ORF past
                    the end on the genome in this
                    frame */
  AjBool GOTSTOP[3];         /* true if found a STOP in a circular
                    genome's frame when
                    find = P_STOP2STOP or
                    N_STOP2STOP */
  ajint start[3];          /* possible starting position of the
                     three frames */
  ajint pos;
  ajint codon;
  char aa;
  ajint frame;
  AjPStr newstr[3];         /* strings of the three frames of ORF
                    sequences that we are growing */
  AjPSeq pep = NULL;
  ajint i;

  ajint seqlen;
  const char *chrseq;

  seqlen = ajSeqGetLen(seq);
  chrseq = ajSeqGetSeqC(seq);

  /* initialise the ORF sequences */
  newstr[0] = NULL;
  newstr[1] = NULL;
  newstr[2] = NULL;

  /*
  ** initialise flags for found the last ORF past the end of a circular
  ** genome
  */
  LASTORF[0] = ajFalse;
  LASTORF[1] = ajFalse;
  LASTORF[2] = ajFalse;

  /* initialise flags for found at least one STOP codon in a frame */
  GOTSTOP[0] = ajFalse;
  GOTSTOP[1] = ajFalse;
  GOTSTOP[2] = ajFalse;

  if (circular || find == P_START2STOP || find == N_START2STOP ||
      find == AROUND_START) {
    ORF[0] = ajFalse;
    ORF[1] = ajFalse;
    ORF[2] = ajFalse;
  } else {
    /*
    ** assume already in a ORF so we get ORFs at the start of the
    ** sequence
    */
    ORF[0] = ajTrue;
    ORF[1] = ajTrue;
    ORF[2] = ajTrue;
    start[0] = 0;
    start[1] = 1;
    start[2] = 2;
  }

  for (pos=0; pos<seqlen-2; pos++) {
    codon = ajTrnStartStopC(trnTable, &chrseq[pos], &aa);
    frame = pos % 3;
    ajDebug("len=%d, Pos=%d, Frame=%d start/stop=%d, aa=%c",
            len, pos, frame, codon, aa);

    /* don't want to find extra ORFs when already been round circ */
    if (LASTORF[frame])
      continue;

    if (find == P_STOP2STOP || find == N_STOP2STOP ||
        find == AROUND_INIT_STOP || find == AROUND_END_STOP) {  /* look for stop codon to begin reporting ORF */
      /* note that there was at least one STOP in a circular genome */
      if (codon == STOP) {
        GOTSTOP[frame] = ajTrue;
      }

      /* write details if a STOP is hit or the end of the sequence */
      if (codon == STOP || pos >= seqlen-5) {

        /*
        ** End of the sequence? If so, append any
        ** last codon to the sequence - otherwise, ignore the STOP
        ** codon
        */
        if (codon != STOP)
          getorf_AppORF(find, &newstr[frame], chrseq, pos,
                        aa);

        /* Already have a sequence to write out? */
        if (ORF[frame]) {
          if (ajStrGetLen(newstr[frame]) >= minsize &&
              ajStrGetLen(newstr[frame]) <= maxsize) {
            /* create a new sequence */
            if (codon == STOP)
              getorf_WriteORF(seq, len, seqlen, sense,
                              find, orf_no, start[frame],
                              pos-1, newstr[frame],
                              seqout, around);
            else
              getorf_WriteORF(seq, len, seqlen, sense,
                              find, orf_no, start[frame],
                              pos+2, newstr[frame],
                              seqout, around);
          }

          ajStrSetClear(&newstr[frame]);
        }

        /*
        ** if its a circular genome and the STOP codon hits past
        ** the end of the genome in all frames, then break
        */
        if (circular && pos >= len) {
          ORF[frame] = ajFalse; /* past the end of the genome */
          LASTORF[frame] = ajTrue; /* finished getting ORFs */
          if (LASTORF[0] && LASTORF[1] && LASTORF[2])
            break;
        } else {
          /*
          ** hit a STOP, therefore a potential ORF to write
          ** out next time, even if the genome is circular
          */
          ORF[frame]   = ajTrue;
          start[frame] = pos+3; /* next start of the ORF */
        }

      } else if (ORF[frame])
        /* append sequence to newstr if in an ORF */
        getorf_AppORF(find, &newstr[frame], chrseq, pos, aa);
    } else { /* Look for start: P_START2STOP N_START2STOP AROUND_START */

      if (codon == START && !ORF[frame]) {
        /* not in a ORF already and found a START */
        if (pos < len) {
          /*
          **  reset the newstr to zero length to enable
          **  storing the ORF for this
          */
          ajStrSetClear(&newstr[frame]);
          ORF[frame] = ajTrue; /* now in an ORF */
          start[frame] = pos;    /* start of the ORF for this frame */
          if (methionine)
            getorf_AppORF(find, &newstr[frame], chrseq,
                          pos, 'M');
          else
            getorf_AppORF(find, &newstr[frame], chrseq,
                          pos, aa);
        }
      } else if (codon == STOP) {
        /* hit a STOP */

        /* Already have a sequence to write out? */
        if (ORF[frame]) {
          ORF[frame] = ajFalse; /* not in an ORF */

          if (ajStrGetLen(newstr[frame]) >= minsize &&
              ajStrGetLen(newstr[frame]) <= maxsize) {
            /* create a new sequence */
            getorf_WriteORF(seq, len, seqlen, sense,
                            find, orf_no, start[frame],
                            pos-1, newstr[frame],
                            seqout, around);
          }
        }

        /*
        ** if a circular genome and hit the STOP past
        ** the end of the genome in all frames, then break
        */
        if (circular && pos >= len) {
          LASTORF[frame] = ajTrue; /* finished getting ORFs */
          if (LASTORF[0] && LASTORF[1] && LASTORF[2]) break;
        }

        ajStrSetClear(&newstr[frame]);
      } else if (pos >= seqlen-5) {
        /* hit the end of the sequence  without a stop */

        /* Already have a sequence to write out? */
        if (ORF[frame]) {
          ORF[frame] = ajFalse; /* not in an ORF */

          /*
          ** End of the sequence? If so, append any
          ** last codon to the sequence - otherwise, ignore the
          ** STOP codon
          */
          if (pos >= seqlen-5 && pos < seqlen-2)
            getorf_AppORF(find, &newstr[frame], chrseq,
                          pos, aa);

          if (ajStrGetLen(newstr[frame]) >= minsize &&
              ajStrGetLen(newstr[frame]) <= maxsize) {
            /* create a new sequence */
            getorf_WriteORF(seq, len, seqlen, sense,
                            find, orf_no, start[frame],
                            pos+2, newstr[frame],
                            seqout, around);
          }
        }

        /*
        ** if a circular genome and hit the STOP past
        ** the end of the genome in all frames, then break
        */
        if (circular && pos >= len) {
          LASTORF[frame] = ajTrue; /* finished getting ORFs */
          if (LASTORF[0] && LASTORF[1] && LASTORF[2]) break;
        }

        ajStrSetClear(&newstr[frame]);
      } else
        if (ORF[frame])
          getorf_AppORF(find, &newstr[frame], chrseq, pos,
                        aa);

    }
  }

  /*
  ** Currently miss reporting a STOP-to-STOP ORF that is
  ** the full length of a circular genome when there are no STOP codons in
  ** that frame
  */
  if ((find == P_STOP2STOP || find == N_STOP2STOP) && circular) {
    if (!GOTSTOP[0]) {
      /* translate frame 1 into pep */
      pep = ajTrnSeqOrig(trnTable, seq, 1);
      if (ajSeqGetLen(pep) >= minsize &&
          ajSeqGetLen(pep) <= maxsize)
        getorf_WriteORF(seq, len, seqlen, sense, find, orf_no,
                        0, seqlen-1, ajSeqGetSeqS(pep), seqout,
                        around);
      ajSeqDel(&pep);
    }

    if (!GOTSTOP[1]) {
      /* translate frame 2 into pep */
      pep = ajTrnSeqOrig(trnTable, seq, 2);
      if (ajSeqGetLen(pep) >= minsize &&
          ajSeqGetLen(pep) <= maxsize)
        getorf_WriteORF(seq, len, seqlen, sense, find, orf_no,
                        1, seqlen-1, ajSeqGetSeqS(pep), seqout,
                        around);
      ajSeqDel(&pep);
    }

    if (!GOTSTOP[2]) {
      /* translate frame 3 into pep */
      pep = ajTrnSeqOrig(trnTable, seq, 3);
      if (ajSeqGetLen(pep) >= minsize &&
          ajSeqGetLen(pep) >= maxsize)
        getorf_WriteORF(seq, len, seqlen, sense, find, orf_no,
                        2, seqlen-1, ajSeqGetSeqS(pep), seqout,
                        around);
      ajSeqDel(&pep);
    }
  }

  for (i=0;i<3;++i)
    ajStrDel(&newstr[i]);

  return;
}
Ejemplo n.º 6
0
int main(int argc, char **argv)
{
    AjPSeqset seq1;
    AjPSeqset seq2;
    AjPFile list;
    ajint n1;
    ajint n2;
    ajint *lengths1;
    ajint *lengths2;
    ajuint *order1;
    ajuint *order2;
    ajint *hits1;
    ajint *hits2;
    ajint curr1;
    ajint curr2;
    ajint tmp1;
    ajint tmp2 = 0;
    ajint i;
    AjPStr operator;
    ajint OperatorCode=0;


    embInit("listor", argc, argv);

    seq1     = ajAcdGetSeqset("firstsequences");
    seq2     = ajAcdGetSeqset("secondsequences");
    list     = ajAcdGetOutfile("outfile");
    operator = ajAcdGetListSingle("operator");

    /* get the operator value */
    switch(ajStrGetCharFirst(operator))
    {
    case 'O':
	OperatorCode = L_OR;
	break;
    case 'A':
	OperatorCode = L_AND;
	break;
    case 'X':
	OperatorCode = L_XOR;
	break;
    case 'N':
	OperatorCode = L_NOT;
	break;
    default:
	ajFatal("Invalid operator type: %S", operator);
	embExitBad();
    }


    /* get the order of seqset 1 by length */
    n1 = ajSeqsetGetSize(seq1);

    /* lengths of seq1 entries */
    lengths1 = AJCALLOC0(n1, sizeof(ajint));

    /* seq1 entries which match seq2 */
    hits1    = AJCALLOC0(n1, sizeof(ajint));

    /* seq1 entries in length order */
    order1   = AJCALLOC0(n1, sizeof(ajint));
    for(i=0; i<n1; i++)
    {
	lengths1[i] = ajSeqGetLen(ajSeqsetGetseqSeq(seq1, i));
	order1[i]   = i;
	hits1[i]    = -1;
    }
    ajSortIntIncI(lengths1, order1, n1);

    /* get the order of seqset 2 by length */
    n2 = ajSeqsetGetSize(seq2);
    lengths2 = AJCALLOC0(n2, sizeof(ajint));
    hits2    = AJCALLOC0(n2, sizeof(ajint));
    order2   = AJCALLOC0(n2, sizeof(ajint));

    for(i=0; i<n2; i++)
    {
	lengths2[i] = ajSeqGetLen(ajSeqsetGetseqSeq(seq2, i));
	order2[i]   = i;
	hits2[i]    = -1;
    }
    ajSortIntIncI(lengths2, order2, n2);

    /*
    ** go down the two sequence sets, by size order, looking for identical
    **lengths
    */
    curr1 = 0;
    curr2 = 0;
    while(curr1 < n1 &&  curr2 < n2)
    {
	if(lengths1[order1[curr1]] < lengths2[order2[curr2]])
	    /* seq1 is shorter - increment curr1 index */
	    curr1++;
	else if(lengths1[order1[curr1]] > lengths2[order2[curr2]])
	    /* seq2 is shorter - increment curr2 index */
	    curr2++;
	else
	{
	    /* identical lengths - check all seq1/seq2 entries of this len */
	    for(tmp1=curr1; tmp1<n1
		 && lengths1[order1[tmp1]] == lengths2[order2[curr2]]; tmp1++)
		for(tmp2=curr2; tmp2<n2 && lengths2[order2[tmp2]] ==
		    lengths2[order2[curr2]]; tmp2++)
		    /* check to see if the sequences are identical */
		    if(!ajStrCmpCaseS(ajSeqGetSeqS(ajSeqsetGetseqSeq(seq1,
							     order1[tmp1])),
				      ajSeqGetSeqS(ajSeqsetGetseqSeq(seq2,
				      order2[tmp2]))))
		    {
			hits1[order1[tmp1]] = order2[tmp2];
			hits2[order2[tmp2]] = order1[tmp1];
		    }

	    curr1 = tmp1;
	    curr2 = tmp2;
	}
    }

    /* output the required entries to the list file */
    listor_Output(list, OperatorCode, seq1, seq2, hits1, hits2, n1, n2);


    AJFREE(lengths1);
    AJFREE(lengths2);
    AJFREE(order1);
    AJFREE(order2);
    AJFREE(hits1);
    AJFREE(hits2);
    ajFileClose(&list);
    ajStrDel(&operator);

    ajSeqsetDel(&seq1);
    ajSeqsetDel(&seq2);

    embExit();

    return 0;
}
Ejemplo n.º 7
0
int main(int argc, char **argv)
{
    AjPList list = NULL;
    AjPSeq seq;
    AjPSeq seq2;
    AjPStr aa0str = 0;
    AjPStr aa1str = 0;
    const char *s1;
    const char *s2;
    char *strret = NULL;
    ajuint i;
    ajuint j;
    ajuint k;
    ajint l;
    ajint abovethresh;
    ajint total;
    ajint starti = 0;
    ajint startj = 0;
    ajint windowsize;
    float thresh;
    AjPGraph graph   = NULL;
    AjPGraph xygraph = NULL;
    float flen1;
    float flen2;
    ajuint len1;
    ajuint len2;

    AjPTime ajtime = NULL;
    time_t tim;
    AjBool boxit=AJTRUE;
    /* Different ticks as they need to be different for x and y due to
       length of string being important on x */
    ajuint acceptableticksx[]=
    {
	1,10,50,100,500,1000,1500,10000,
	500000,1000000,5000000
    };
    ajuint acceptableticks[]=
    {
	1,10,50,100,200,500,1000,2000,5000,10000,15000,
	500000,1000000,5000000
    };
    ajint numbofticks = 10;
    float xmargin;
    float ymargin;
    float ticklen;
    float tickgap;
    float onefifth;
    float k2;
    float max;
    char ptr[10];
    AjPMatrix matrix = NULL;
    ajint** sub;
    AjPSeqCvt cvt;
    AjPStr  subt = NULL;

    ajint b1;
    ajint b2;
    ajint e1;
    ajint e2;
    AjPStr se1;
    AjPStr se2;
    ajint ithresh;
    AjBool stretch;
    PPoint ppt = NULL;
    float xa[1];
    float ya[1];
    AjPGraphdata gdata=NULL;
    AjPStr tit   = NULL;
    AjIList iter = NULL;
    float x1 = 0.;
    float x2 = 0.;
    float y1 = 0.;
    float y2 = 0.;
    ajuint tui;
    
    se1 = ajStrNew();
    se2 = ajStrNew();

    embInit("dotmatcher", argc, argv);
    
    seq        = ajAcdGetSeq("asequence");
    seq2       = ajAcdGetSeq("bsequence");
    stretch    = ajAcdGetToggle("stretch");
    graph      = ajAcdGetGraph("graph");
    xygraph    = ajAcdGetGraphxy("xygraph");
    windowsize = ajAcdGetInt("windowsize");
    ithresh    = ajAcdGetInt("threshold");
    matrix     = ajAcdGetMatrix("matrixfile");
    
    sub = ajMatrixGetMatrix(matrix);
    cvt = ajMatrixGetCvt(matrix);
    
    thresh = (float)ithresh;

    ajtime = ajTimeNew();

    tim = time(0);
    ajTimeSetLocal(ajtime, tim);
    
    b1 = ajSeqGetBegin(seq);
    b2 = ajSeqGetBegin(seq2);
    e1 = ajSeqGetEnd(seq);
    e2 = ajSeqGetEnd(seq2);
    len1 = ajSeqGetLen(seq);
    len2 = ajSeqGetLen(seq2);
    tui   = ajSeqGetLen(seq);
    flen1 = (float) tui;
    tui   = ajSeqGetLen(seq2);
    flen2 = (float) tui;
    
    ajStrAssignSubC(&se1,ajSeqGetSeqC(seq),b1-1,e1-1);
    ajStrAssignSubC(&se2,ajSeqGetSeqC(seq2),b2-1,e2-1);
    ajSeqAssignSeqS(seq,se1);
    ajSeqAssignSeqS(seq2,se2);
    
    
    s1 = ajStrGetPtr(ajSeqGetSeqS(seq));
    s2 = ajStrGetPtr(ajSeqGetSeqS(seq2));
    
    
    aa0str = ajStrNewRes(1+len1); /* length plus trailing blank */
    aa1str = ajStrNewRes(1+len2);
    
    list = ajListNew();
    
    
    for(i=0;i<len1;i++)
	ajStrAppendK(&aa0str,(char)ajSeqcvtGetCodeK(cvt, *s1++));
    
    for(i=0;i<len2;i++)
	ajStrAppendK(&aa1str,(char)ajSeqcvtGetCodeK(cvt, *s2++));
    
    max = (float)len1;
    if(len2 > max)
	max = (float) len2;
    
    xmargin = ymargin = max *(float)0.15;
    ticklen = xmargin*(float)0.1;
    onefifth  = xmargin*(float)0.2;
    
    subt = ajStrNewC((strret=
		      ajFmtString("(windowsize = %d, threshold = %3.2f  %D)",
				  windowsize,thresh,ajtime)));
    
    

    if(!stretch)
    {
	if( ajStrGetLen(ajGraphGetSubtitleS(graph)) <=1)
	    ajGraphSetSubtitleS(graph,subt);

	ajGraphOpenWin(graph, (float)0.0-ymargin,(max*(float)1.35)+ymargin,
		       (float)0.0-xmargin,(float)max+xmargin);

	ajGraphicsDrawposTextAtmid(flen1*(float)0.5,
                                   (float)0.0-(xmargin/(float)2.0),
		       ajGraphGetXlabelC(graph));
	ajGraphicsDrawposTextAtlineJustify((float)0.0-(xmargin*(float)0.75),
                                           flen2*(float)0.5,
			(float)0.0-(xmargin*(float)0.75),flen1,
			ajGraphGetYlabelC(graph),0.5);

	ajGraphicsSetCharscale(0.5);
    }
    
    
    
    s1= ajStrGetPtr(aa0str);
    s2 = ajStrGetPtr(aa1str);
    
    for(j=0; (ajint)j < (ajint)len2-windowsize;j++)
    {
	i =0;
	total = 0;
	abovethresh =0;

	k = j;
	for(l=0;l<windowsize;l++)
	    total = total + sub[(ajint)s1[i++]][(ajint)s2[k++]];

	if(total >= thresh)
	{
	    abovethresh=1;
	    starti = i-windowsize;
	    startj = k-windowsize;
	}

	while(i < len1 && k < len2)
	{
	    total = total - sub[(ajint)s1[i-windowsize]]
		[(ajint)s2[k-windowsize]];
	    total = total + sub[(ajint)s1[i]][(ajint)s2[k]];

	    if(abovethresh)
	    {
		if(total < thresh)
		{
		    abovethresh = 0;
		    /* draw the line */
		    dotmatcher_pushpoint(&list,(float)starti,(float)startj,
					 (float)i-1,(float)k-1,stretch);
		}
	    }
	    else if(total >= thresh)
	    {
		starti = i-windowsize;
		startj = k-windowsize;
		abovethresh= 1;
	    }
	    i++;
	    k++;
	}

	if(abovethresh)
	    /* draw the line */
	    dotmatcher_pushpoint(&list,(float)starti,(float)startj,
				 (float)i-1,(float)k-1,
				 stretch);
    }
    
    for(i=0; (ajint)i < (ajint)len1-windowsize;i++)
    {
	j = 0;
	total = 0;
	abovethresh =0;

	k = i;
	for(l=0;l<windowsize;l++)
	    total = total + sub[(ajint)s1[k++]][(ajint)s2[j++]];

	if(total >= thresh)
	{
	    abovethresh=1;
	    starti = k-windowsize;
	    startj = j-windowsize;
	}

	while(k < len1 && j < len2)
	{
	    total = total - sub[(ajint)s1[k-windowsize]]
		[(ajint)s2[j-windowsize]];
	    total = total + sub[(ajint)s1[k]][(ajint)s2[j]];

	    if(abovethresh)
	    {
		if(total < thresh)
		{
		    abovethresh = 0;
		    /* draw the line */
		    dotmatcher_pushpoint(&list,(float)starti,(float)startj,
					 (float)k-1,(float)j-1,stretch);
		}
	    }
	    else if(total >= thresh)
	    {
		starti = k-windowsize;
		startj = j-windowsize;
		abovethresh= 1;
	    }
	    j++;
	    k++;
	}

	if(abovethresh)
	    /* draw the line */
	    dotmatcher_pushpoint(&list,(float)starti,(float)startj,
				 (float)k-1,(float)j-1,
				 stretch);
    }
    
    if(boxit && !stretch)
    {
	ajGraphicsDrawposRect(0.0,0.0,flen1, flen2);

	i=0;
	while(acceptableticksx[i]*numbofticks < len1)
	    i++;

	if(i<=13)
	    tickgap = (float)acceptableticksx[i];
	else
	    tickgap = (float)acceptableticksx[10];
	ticklen   = xmargin*(float)0.1;
	onefifth  = xmargin*(float)0.2;

	if(len2/len1 > 10 )
	{
	    /* if a lot smaller then just label start and end */
	    ajGraphicsDrawposLine((float)0.0,(float)0.0,(float)0.0,(float)0.0-ticklen);
	    sprintf(ptr,"%d",b1-1);
	    ajGraphicsDrawposTextAtmid((float)0.0,(float)0.0-(onefifth),ptr);

	    ajGraphicsDrawposLine(flen1,(float)0.0,
			flen1,(float)0.0-ticklen);
	    sprintf(ptr,"%d",len1+b1-1);
	    ajGraphicsDrawposTextAtmid(flen1,(float)0.0-(onefifth),ptr);

	}
	else
	    for(k2=0.0;k2<len1;k2+=tickgap)
	    {
		ajGraphicsDrawposLine(k2,(float)0.0,k2,(float)0.0-ticklen);
		sprintf(ptr,"%d",(ajint)k2+b1-1);
		ajGraphicsDrawposTextAtmid(k2,(float)0.0-(onefifth),ptr);
	    }

	i = 0;
	while(acceptableticks[i]*numbofticks < len2)
	    i++;

	tickgap   = (float)acceptableticks[i];
	ticklen   = ymargin*(float)0.01;
	onefifth  = ymargin*(float)0.02;

	if(len1/len2 > 10 )
	{
	    /* if a lot smaller then just label start and end */
	    ajGraphicsDrawposLine((float)0.0,(float)0.0,(float)0.0-ticklen,(float)0.0);
	    sprintf(ptr,"%d",b2-1);
	    ajGraphicsDrawposTextAtend((float)0.0-(onefifth),(float)0.0,ptr);

	    ajGraphicsDrawposLine((float)0.0,flen2,(float)0.0-ticklen,
			flen2);
	    sprintf(ptr,"%d",len2+b2-1);
	    ajGraphicsDrawposTextAtend((float)0.0-(onefifth),flen2,ptr);
	}
	else
	    for(k2=0.0;k2<len2;k2+=tickgap)
	    {
		ajGraphicsDrawposLine((float)0.0,k2,(float)0.0-ticklen,k2);
		sprintf(ptr,"%d",(ajint)k2+b2-1);
		ajGraphicsDrawposTextAtend((float)0.0-(onefifth),k2,ptr);
	    }
    }
    
    
    if(!stretch)
	ajGraphicsClose();
    else			/* the xy graph for -stretch */
    {
	tit = ajStrNew();
	ajFmtPrintS(&tit,"%S",ajGraphGetTitleS(xygraph));


	gdata = ajGraphdataNewI(1);
	xa[0] = (float)b1;
	ya[0] = (float)b2;

	ajGraphSetTitleC(xygraph,ajStrGetPtr(tit));

	ajGraphSetXlabelC(xygraph,ajSeqGetNameC(seq));
	ajGraphSetYlabelC(xygraph,ajSeqGetNameC(seq2));

	ajGraphdataSetTypeC(gdata,"2D Plot Float");
	ajGraphdataSetTitleS(gdata,subt);
	ajGraphdataSetMinmax(gdata,(float)b1,(float)e1,(float)b2,
			       (float)e2);
	ajGraphdataSetTruescale(gdata,(float)b1,(float)e1,(float)b2,
			       (float)e2);
	ajGraphxySetXstartF(xygraph,(float)b1);
	ajGraphxySetXendF(xygraph,(float)e1);
	ajGraphxySetYstartF(xygraph,(float)b2);
	ajGraphxySetYendF(xygraph,(float)e2);

	ajGraphxySetXrangeII(xygraph,b1,e1);
	ajGraphxySetYrangeII(xygraph,b2,e2);


	if(list)
	{
	    iter = ajListIterNewread(list);
	    while((ppt = ajListIterGet(iter)))
	    {
		x1 = ppt->x1+b1-1;
		y1 = ppt->y1+b2-1;
		x2 = ppt->x2+b1-1;
		y2 = ppt->y2+b2-1;
		ajGraphAddLine(xygraph,x1,y1,x2,y2,0);
		AJFREE(ppt);
	    }
	    ajListIterDel(&iter);
	}

	ajGraphdataAddXY(gdata,xa,ya);
	ajGraphDataReplace(xygraph,gdata);


	ajGraphxyDisplay(xygraph,ajFalse);
	ajGraphicsClose();

	ajStrDel(&tit);
    }
    
    
    
    ajListFree(&list);

    ajSeqDel(&seq);
    ajSeqDel(&seq2);
    ajGraphxyDel(&graph);
    ajGraphxyDel(&xygraph);
    ajMatrixDel(&matrix);
    ajTimeDel(&ajtime);
    
    /* deallocate memory */
    ajStrDel(&aa0str);
    ajStrDel(&aa1str);
    ajStrDel(&se1);
    ajStrDel(&se2);
    ajStrDel(&subt);

    AJFREE(strret);			/* created withing ajFmtString */
    
    embExit();

    return 0;
}
Ejemplo n.º 8
0
int main(int argc, char *argv[])
{
  embInitPV("ggcsi", argc, argv, "GEMBASSY", "1.0.1");

  struct soap soap;
  struct ns1__gcsiInputParams params;

  AjPSeqall seqall;
  AjPSeq    seq;
  AjPStr    inseq   = NULL;
  AjPStr    seqid   = NULL;
  ajint	    window  = 0;
  AjBool    at      = 0;
  AjBool    purine  = 0;
  AjBool    keto    = 0;
  AjBool    pval    = 0;
  AjPStr    version = NULL;
  AjBool    accid   = ajFalse;
  AjPStr    tmp     = NULL;
  AjPStr    parse   = NULL;
  AjPStr    gcsi    = NULL;
  AjPStr    sa      = NULL;
  AjPStr    dist    = NULL;
  AjPStr    z       = NULL;
  AjPStr    p       = NULL;
  AjPStrTok handle  = NULL;

  char *in0;
  char *result;

  AjPFile outf = NULL;

  seqall  = ajAcdGetSeqall("sequence");
  window  = ajAcdGetInt("window");
  at      = ajAcdGetBoolean("at");
  purine  = ajAcdGetBoolean("purine");
  keto    = ajAcdGetBoolean("keto");
  pval    = ajAcdGetBoolean("pval");
  version = ajAcdGetSelectSingle("gcsi");
  accid   = ajAcdGetBoolean("accid");
  outf    = ajAcdGetOutfile("outfile");

  params.window = window;
  params.at     = 0;
  params.purine = 0;
  params.keto   = 0;
  params.p      = 0;
  ajStrToInt(version, &(params.version));

  if(at)
    params.at = 1;
  if(purine)
    params.purine = 1;
  if(keto)
    params.keto = 1;
  if(pval)
    params.p = 1;

  while(ajSeqallNext(seqall, &seq))
    {
      soap_init(&soap);

      inseq = NULL;

      ajStrAppendC(&inseq, ">");
      ajStrAppendS(&inseq, ajSeqGetNameS(seq));
      ajStrAppendC(&inseq, "\n");
      ajStrAppendS(&inseq, ajSeqGetSeqS(seq));

      ajStrAssignS(&seqid, ajSeqGetAccS(seq));

      in0 = ajCharNewS(inseq);

      if (soap_call_ns1__gcsi(
	                      &soap,
                              NULL,
                              NULL,
                              in0,
                             &params,
                             &result
                            ) == SOAP_OK)
	{
	  tmp   = ajStrNew();
	  parse = ajStrNew();
	  gcsi  = ajStrNew();
	  sa    = ajStrNew();
	  dist  = ajStrNew();
	  z     = ajStrNew();
	  p     = ajStrNew();

	  ajStrAssignC(&tmp, result);

	  ajStrExchangeCC(&tmp, "<", "\n");
	  ajStrExchangeCC(&tmp, ">", "\n");

	  handle = ajStrTokenNewC(tmp, "\n");

	  while (ajStrTokenNextParse(&handle, &parse))
	    {
	      if (ajStrIsFloat(parse))
		{
		  if(!ajStrGetLen(gcsi))
		    ajStrAssignS(&gcsi, parse);
		  else if(!ajStrGetLen(sa))
		    ajStrAssignS(&sa, parse);
		  else if(!ajStrGetLen(dist))
		    ajStrAssignS(&dist, parse);
		  else if(!ajStrGetLen(z))
		    ajStrAssignS(&z, parse);
		  else if(!ajStrGetLen(p))
		    ajStrAssignS(&p, parse);
		}
	    }

	  tmp = ajFmtStr("Sequence: %S GCSI: %S SA: %S DIST: %S",
			 seqid, gcsi, sa, dist);

	  if(pval)
	    tmp = ajFmtStr("%S Z: %S P: %S", tmp, z, p);

          ajFmtPrintF(outf, "%S\n", tmp);

	  ajStrDel(&tmp);
	  ajStrDel(&parse);
	  ajStrDel(&gcsi);
	  ajStrDel(&sa);
	  ajStrDel(&dist);
	  ajStrDel(&z);
	  ajStrDel(&p);
	}
      else
	{
	  soap_print_fault(&soap, stderr);
	}

      soap_destroy(&soap);
      soap_end(&soap);
      soap_done(&soap);

      AJFREE(in0);

      ajStrDel(&inseq);
    }

  ajFileClose(&outf);

  ajSeqallDel(&seqall);
  ajSeqDel(&seq);
  ajStrDel(&seqid);

  embExit();

  return 0;
}
Ejemplo n.º 9
0
static void restover_printHits(const AjPSeq seq, const AjPStr seqcmp,
			       AjPFile outf,
			       AjPList l, const AjPStr name, ajint hits,
			       ajint begin, ajint end,
			       ajint mincut, ajint maxcut, AjBool plasmid,
			       ajint sitelen,
			       AjBool limit, const AjPTable table,
			       AjBool alpha, AjBool frags,
			       AjBool html)
{
    EmbPMatMatch m = NULL;
    AjPStr ps = NULL;
    ajint *fa = NULL;
    ajint *fx = NULL;
    ajint fc = 0;
    ajint fn = 0;
    ajint fb = 0;
    ajint last = 0;
    AjPStr overhead = NULL;

    const AjPStr value = NULL;

    ajint i;
    ajint c = 0;

    ajint hang1;
    ajint hang2;


    ps = ajStrNew();
    fn = 0;

    if(html)
	ajFmtPrintF(outf,"<BR>");
    ajFmtPrintF(outf,"# Restrict of %S from %d to %d\n",name,begin,end);

    if(html)
	ajFmtPrintF(outf,"<BR>");
    ajFmtPrintF(outf,"#\n");

    if(html)
	ajFmtPrintF(outf,"<BR>");
    ajFmtPrintF(outf,"# Minimum cuts per enzyme: %d\n",mincut);

    if(html)
	ajFmtPrintF(outf,"<BR>");
    ajFmtPrintF(outf,"# Maximum cuts per enzyme: %d\n",maxcut);

    if(html)
	ajFmtPrintF(outf,"<BR>");
    ajFmtPrintF(outf,"# Minimum length of recognition site: %d\n",
		sitelen);
    if(html)
	ajFmtPrintF(outf,"<BR>");

    hits = embPatRestrictRestrict(l,hits,!limit,alpha);

    if(frags)
    {
	fa = AJALLOC(hits*2*sizeof(ajint));
	fx = AJALLOC(hits*2*sizeof(ajint));
    }


    ajFmtPrintF(outf,"# Number of hits with any overlap: %d\n",hits);

    if(html)
	ajFmtPrintF(outf,"<BR>");

    if(html)
	ajFmtPrintF(outf,"</p><table  border cellpadding=4 "
		    "bgcolor=\"#FFFFF0\">\n");
    if(html)
	ajFmtPrintF(outf,
		    "<th>Base Number</th><th>Enzyme</th><th>Site</th>"
		    "<th>5'</th><th>3'</th><th>[5'</th><th>3']</th>\n");
    else
	ajFmtPrintF(outf,"# Base Number\tEnzyme\t\tSite\t\t5'\t3'\t"
		    "[5'\t3']\n");

    for(i=0;i<hits;++i)
    {
	ajListPop(l,(void **)&m);
	ajDebug("hit %d start:%d cut1:%d cut2:%d\n",
		i, m->start, m->cut1, m->cut2);

	hang1 = (ajint)m->cut1 - (ajint)m->start;
	hang2 = (ajint)m->cut2 - (ajint)m->start;

	if(!plasmid && (hang1>100 || hang2>100))
	{
	    embMatMatchDel(&m);
	    continue;
	}

	if(limit)
	{
	    value=ajTableFetchS(table,m->cod);
	    if(value)
		ajStrAssignS(&m->cod,value);
	}

	if(m->cut2 >= m->cut1)
	    ajStrAssignSubS(&overhead, ajSeqGetSeqS( seq), m->cut1, m->cut2-1);
	else
	{
	    ajStrAssignSubS(&overhead, ajSeqGetSeqS( seq), m->cut2, m->cut1-1);
	    ajStrReverse(&overhead);
	}

	ajDebug("overhead:%S seqcmp:%S\n", overhead, seqcmp);

	/* Print out only those who have the same overhang. */
	if(ajStrMatchCaseS(overhead, seqcmp))
	{
	    if(html)
	    {
		ajFmtPrintF(outf,
			    "<tr><td>%-d</td><td>%-16s</td><td>%-16s"
			    "</td><td>%d</td><td>%d</td></tr>\n",
			    m->start,ajStrGetPtr(m->cod),ajStrGetPtr(m->pat),
			    m->cut1,m->cut2);
	    }
	    else
		ajFmtPrintF(outf,"\t%-d\t%-16s%-16s%d\t%d\t\n",
			    m->start,ajStrGetPtr(m->cod),ajStrGetPtr(m->pat),
			    m->cut1,m->cut2);
	}

	if(frags)
	    fa[fn++] = m->cut1;

	if(m->cut3 || m->cut4)
	{
	    if(m->cut4 >= m->cut3)
		ajStrAssignSubS(&overhead, ajSeqGetSeqS( seq),
				m->cut3, m->cut4-1);
	    else
	    {
		ajStrAssignSubS(&overhead, ajSeqGetSeqS( seq),
				m->cut4, m->cut3-1);
		ajStrReverse(&overhead);
	    }

	    if(ajStrMatchCaseS(overhead, seqcmp))
	    {
		if(html)
		    ajFmtPrintF(outf,
				"<tr><td>%-d</td><td>%-16s</td><td>%-16s"
				"</td><td></td><td></td><td>%d</td><td>%d"
				"</td></tr>\n",
				m->start,ajStrGetPtr(m->cod),
				ajStrGetPtr(m->pat),
				m->cut1,m->cut2);
		else
		    ajFmtPrintF(outf,"\t%-d\t%-16s%-16s\t\t%d\t%d\t\n",
				m->start,ajStrGetPtr(m->cod),
				ajStrGetPtr(m->pat),
				m->cut1,m->cut2);
	    }
	}

	/* I am not sure what fragments are doing so I left it in ...*/
	/* used in the report tail in restrict - restover does much the same */
	if(m->cut3 || m->cut4)
	{
	    if(frags)
		fa[fn++] = m->cut3;
	    /*	       ajFmtPrintF(*outf,"%d\t%d",m->cut3,m->cut4);*/
	}
	ajStrDel(&overhead);

	embMatMatchDel(&m);
    }



    if(frags)
    {
	ajSortIntInc(fa,fn);
	ajFmtPrintF(outf,"\n\nFragment lengths:\n");
	if(!fn || (fn==1 && plasmid))
	    ajFmtPrintF(outf,"    %d\n",end-begin+1);
	else
	{
	    last = -1;
	    fb = 0;
	    for(i=0;i<fn;++i)
	    {
		if((c=fa[i])!=last)
		    fa[fb++]=c;
		last = c;
	    }
	    fn = fb;
	    /* Calc lengths */

	    for(i=0;i<fn-1;++i)
		fx[fc++] = fa[i+1]-fa[i];
	    if(!plasmid)
	    {
		fx[fc++] = fa[0]-begin+1;
		fx[fc++] = end-fa[fn-1];
	    }
	    else
		fx[fc++] = (fa[0]-begin+1)+(end-fa[fn-1]);

	    ajSortIntDec(fx,fc);
	    for(i=0;i<fc;++i)
		ajFmtPrintF(outf,"    %d\n",fx[i]);
	}
	AJFREE(fa);
	AJFREE(fx);
    }


    ajStrDel(&ps);

    if(html)
	ajFmtPrintF(outf,"</table>\n");

    return;
}
Ejemplo n.º 10
0
int main(int argc, char **argv)
{
    AjPSeqall queryseqs;
    AjPSeqset targetseqs;
    AjPSeq queryseq;
    const AjPSeq targetseq;
    AjPStr queryaln = 0;
    AjPStr targetaln = 0;

    AjPFile errorf;
    AjBool show = ajFalse;

    const char   *queryseqc;
    const char   *targetseqc;

    AjPMatrixf matrix;
    AjPSeqCvt cvt = 0;
    float **sub;
    ajint *compass = NULL;
    float *path = NULL;

    float gapopen;
    float gapextend;
    float score;
    float minscore;

    ajuint j, k;
    ajint querystart = 0;
    ajint targetstart = 0;
    ajint queryend   = 0;
    ajint targetend   = 0;
    ajint width  = 0;
    AjPTable kmers = 0;
    ajint wordlen = 6;
    ajint oldmax = 0;
    ajint newmax = 0;

    ajuint ntargetseqs;
    ajuint nkmers;

    AjPAlign align = NULL;
    EmbPWordMatch maxmatch; /* match with maximum score */

    /* Cursors for the current sequence being scanned,
    ** i.e., until which location it was scanned.
    ** Separate cursor/location entries for each sequence in the seqset.
    */
    ajuint* lastlocation;

    EmbPWordRK* wordsw = NULL;
    AjPList* matchlist = NULL;

    embInit("supermatcher", argc, argv);

    matrix    = ajAcdGetMatrixf("datafile");
    queryseqs = ajAcdGetSeqall("asequence");
    targetseqs= ajAcdGetSeqset("bsequence");
    gapopen   = ajAcdGetFloat("gapopen");
    gapextend = ajAcdGetFloat("gapextend");
    wordlen   = ajAcdGetInt("wordlen");
    align     = ajAcdGetAlign("outfile");
    errorf    = ajAcdGetOutfile("errorfile");
    width     = ajAcdGetInt("width");	/* width for banded Smith-Waterman */
    minscore  = ajAcdGetFloat("minscore");

    gapopen   = ajRoundFloat(gapopen, 8);
    gapextend = ajRoundFloat(gapextend, 8);

    sub = ajMatrixfGetMatrix(matrix);
    cvt = ajMatrixfGetCvt(matrix);

    embWordLength(wordlen);

    /* seqset sequence is the reference sequence for SAM format */
    ajAlignSetRefSeqIndx(align, 1);

    ajSeqsetTrim(targetseqs);

    ntargetseqs = ajSeqsetGetSize(targetseqs);

    AJCNEW0(matchlist, ntargetseqs);

    /* get tables of words */
    for(k=0;k<ntargetseqs;k++)
    {
	targetseq = ajSeqsetGetseqSeq(targetseqs, k);
	embWordGetTable(&kmers, targetseq);
	ajDebug("Number of distinct kmers found so far: %d\n",
		ajTableGetLength(kmers));
    }
    AJCNEW0(lastlocation, ntargetseqs);

    if(ajTableGetLength(kmers)<1)
	ajErr("no kmers found");

    nkmers = embWordRabinKarpInit(kmers, &wordsw, wordlen, targetseqs);

    while(ajSeqallNext(queryseqs,&queryseq))
    {
	ajSeqTrim(queryseq);

	queryaln = ajStrNewRes(1+ajSeqGetLen(queryseq));

	ajDebug("Read '%S'\n", ajSeqGetNameS(queryseq));

	for(k=0;k<ntargetseqs;k++)
	{
	    lastlocation[k]=0;
	    matchlist[k] = ajListstrNew();
	}

	embWordRabinKarpSearch(ajSeqGetSeqS(queryseq), targetseqs,
		(const EmbPWordRK*)wordsw, wordlen, nkmers,
		matchlist, lastlocation, ajFalse);


	for(k=0;k<ajSeqsetGetSize(targetseqs);k++)
	{
	    targetseq      = ajSeqsetGetseqSeq(targetseqs, k);

	    ajDebug("Processing '%S'\n", ajSeqGetNameS(targetseq));

	    if(ajListGetLength(matchlist[k])==0)
	    {
		ajFmtPrintF(errorf,
		            "No wordmatch start points for "
		            "%s vs %s. No alignment\n",
		            ajSeqGetNameC(queryseq),ajSeqGetNameC(targetseq));
		embWordMatchListDelete(&matchlist[k]);
		continue;
	    }


	    /* only the maximum match is used as seed
	     * (if there is more than one location with the maximum match
	     * only the first one is used)
	     * TODO: we should add a new option to make above limit optional
	     */
	    maxmatch = embWordMatchFirstMax(matchlist[k]);

	    supermatcher_findendpoints(maxmatch,targetseq, queryseq,
		    &targetstart, &querystart,
		    &targetend, &queryend);

	    targetaln=ajStrNewRes(1+ajSeqGetLen(targetseq));
	    queryseqc = ajSeqGetSeqC(queryseq);
	    targetseqc = ajSeqGetSeqC(targetseq);

	    ajStrAssignC(&queryaln,"");
	    ajStrAssignC(&targetaln,"");

	    ajDebug("++ %S v %S start:%d %d end:%d %d\n",
		    ajSeqGetNameS(targetseq), ajSeqGetNameS(queryseq),
		    targetstart, querystart, targetend, queryend);

	    newmax = (targetend-targetstart+2)*width;

	    if(newmax > oldmax)
	    {
		AJCRESIZE0(path,oldmax,newmax);
		AJCRESIZE0(compass,oldmax,newmax);
		oldmax=newmax;
		ajDebug("++ memory re/allocation for path/compass arrays"
			" to size: %d\n", newmax);
	    }
	    else
	    {
		AJCSET0(path,newmax);
		AJCSET0(compass,newmax);
	    }

	    ajDebug("Calling embAlignPathCalcSWFast "
		    "%d..%d [%d/%d] %d..%d [%d/%d] width:%d\n",
		    querystart, queryend, (queryend - querystart + 1),
		    ajSeqGetLen(queryseq),
		    targetstart, targetend, (targetend - targetstart + 1),
		    ajSeqGetLen(targetseq),
		    width);

	    score = embAlignPathCalcSWFast(&targetseqc[targetstart],
	                                   &queryseqc[querystart],
	                                   targetend-targetstart+1,
	                                   queryend-querystart+1,
	                                   0,width,
	                                   gapopen,gapextend,
	                                   path,sub,cvt,
	                                   compass,show);
	    if(score>minscore)
	    {
		embAlignWalkSWMatrixFast(path,compass,gapopen,gapextend,
		                         targetseq,queryseq,
		                         &targetaln,&queryaln,
		                         targetend-targetstart+1,
		                         queryend-querystart+1,
		                         0,width,
		                         &targetstart,&querystart);

		if(!ajAlignFormatShowsSequences(align))
		{
		    ajAlignDefineCC(align, ajStrGetPtr(targetaln),
		                    ajStrGetPtr(queryaln),
		                    ajSeqGetNameC(targetseq),
		                    ajSeqGetNameC(queryseq));
		    ajAlignSetScoreR(align, score);
		}
		else
		{
		    ajDebug(" queryaln:%S \ntargetaln:%S\n",
		            queryaln,targetaln);
		    embAlignReportLocal(align,
			    queryseq, targetseq,
			    queryaln, targetaln,
			    querystart, targetstart,
			    gapopen, gapextend,
			    score, matrix,
			    1 + ajSeqGetOffset(queryseq),
			    1 + ajSeqGetOffset(targetseq)
		    );
		}
		ajAlignWrite(align);
		ajAlignReset(align);
	    }
	    ajStrDel(&targetaln);

	    embWordMatchListDelete(&matchlist[k]);
	}

	ajStrDel(&queryaln);
    }


    for(k=0;k<nkmers;k++)
    {
	AJFREE(wordsw[k]->seqindxs);
	AJFREE(wordsw[k]->nSeqMatches);

	for(j=0;j<wordsw[k]->nseqs;j++)
	    AJFREE(wordsw[k]->locs[j]);

	AJFREE(wordsw[k]->nnseqlocs);
	AJFREE(wordsw[k]->locs);
	AJFREE(wordsw[k]);
    }

    embWordFreeTable(&kmers);

    if(!ajAlignFormatShowsSequences(align))
	ajMatrixfDel(&matrix);
    
    AJFREE(path);
    AJFREE(compass);
    AJFREE(kmers);
    AJFREE(wordsw);

    AJFREE(matchlist);
    AJFREE(lastlocation);

    ajAlignClose(align);
    ajAlignDel(&align);
    ajSeqallDel(&queryseqs);
    ajSeqDel(&queryseq);
    ajSeqsetDel(&targetseqs);
    ajFileClose(&errorf);

    embExit();

    return 0;
}
Ejemplo n.º 11
0
void emboss_copy(AjPSeqset seqset, char ***retseqs, AINFO *info)
{
    ajint n;
    ajint maxlen;
    ajint len;
    char **seqs;
    const AjPSeq seq = NULL;
    ajint i=0;
    const AjPStr fmt=NULL;
    const char *p=NULL;
    char  c='\0';
    /*
    char *q=NULL;
    AjPSelexseq   sqdata=NULL;
    AjPSelexdata sdata=NULL;
    */
    ajint cnt=0;
    info->name = NULL;
    info->rf=NULL;
    info->cs=NULL;
    info->desc=NULL;
    info->acc=NULL;
    info->au=NULL;
    info->flags=0;

    AjPStr tmpstr = NULL;

    ajSeqsetFill(seqset);

    fmt = ajSeqsetGetFormat(seqset);
    n = ajSeqsetGetSize(seqset);
    ajSeqsetFmtUpper(seqset);

    maxlen = ajSeqsetGetLen(seqset);


    /* First allocate and copy sequences */
    AJCNEW0(seqs,n);
    for(i=0; i<n; ++i)
    {
        seqs[i] = ajCharNewRes(maxlen+1);
        strcpy(seqs[i],ajSeqGetSeqC(ajSeqsetGetseqSeq(seqset,i)));
    }

    info->sqinfo = (SQINFO *) calloc (sizeof(SQINFO), n);

    for(i=0; i<n; ++i)
    {
        info->sqinfo[i].flags = 0;
        strcpy(info->sqinfo[i].name,"");
        strcpy(info->sqinfo[i].id,"");
        strcpy(info->sqinfo[i].acc,"");
        strcpy(info->sqinfo[i].desc,"");
        info->sqinfo[i].len = 0;
        info->sqinfo[i].start = 0;
        info->sqinfo[i].stop = 0;
        info->sqinfo[i].olen = 0;
        info->sqinfo[i].type = 0;
        info->sqinfo[i].ss = NULL;
        info->sqinfo[i].sa =NULL;
    }

    AJCNEW0(info->wgt,n);

    for(i=0; i<n; ++i)
    {
        info->sqinfo[i].flags = 0;
        info->wgt[i] = ajSeqsetGetseqWeight(seqset,i);
    }
    info->nseq = n;
    info->alen = maxlen;

    for(i=0; i<n; ++i)
    {
        seq = ajSeqsetGetseqSeq(seqset,i);
        if((len=ajStrGetLen(ajSeqGetNameS(seq))))
        {
            if(len>= SQINFO_NAMELEN)
                len = SQINFO_NAMELEN - 1;
            ajStrAssignSubS(&tmpstr, ajSeqGetNameS(seq), 0, len);
            strcpy(info->sqinfo[i].id,ajStrGetPtr(tmpstr));
            info->sqinfo[i].flags |= SQINFO_ID;
            strcpy(info->sqinfo[i].name,ajStrGetPtr(tmpstr));
            info->sqinfo[i].flags |= SQINFO_NAME;
        }
        if((len=ajStrGetLen(ajSeqGetAccS(seq))))
        {
            if(len>= SQINFO_NAMELEN)
                len = SQINFO_NAMELEN - 1;
            ajStrAssignSubS(&tmpstr, ajSeqGetAccS(seq), 0, len);
            strcpy(info->sqinfo[i].acc,ajStrGetPtr(tmpstr));
            info->sqinfo[i].flags |= SQINFO_ACC;
        }
    }
    seq = ajSeqsetGetseqSeq(seqset,0);
    info->cs = ajCharNewS(ajSeqGetSeqS(seq));
    info->name = ajCharNewS(ajSeqGetNameS(seq));
    info->acc = ajCharNewS(ajSeqGetAccS(seq));
    info->desc = ajCharNewS(ajSeqGetDescS(seq));
    info->rf = ajCharNewS(ajSeqGetSeqS(seq));

    /*
        info->rf = ajCharNewS(seq);

    	len = ajStrGetLen(seq->Selexdata->name);
    	info->name = ajCharNewRes(len+1);
    	strcpy(info->name,ajStrGetPtr(seq->Selexdata->name));
    	len = ajStrGetLen(seq->Selexdata->de);
    	info->desc = ajCharNewRes(len+1);

    	sdata = seq->Selexdata;
    	strcpy(info->desc,ajStrGetPtr(sdata->de));
    	len = ajStrGetLen(sdata->ac);
    	info->acc = ajCharNewRes(len+1);
    	strcpy(info->acc,ajStrGetPtr(sdata->ac));
    	len = ajStrGetLen(sdata->au);
    	info->au = ajCharNewRes(len+1);
    	strcpy(info->au,ajStrGetPtr(sdata->au));
    	if(sdata->tc[0] || sdata->tc[1])
    	{
    	    info->flags |= AINFO_TC;
    	    info->tc1 = sdata->tc[0];
    	    info->tc2 = sdata->tc[1];
    	}
    	if(sdata->nc[0] || sdata->nc[1])
    	{
    	    info->flags |= AINFO_NC;
    	    info->nc1 = sdata->nc[0];
    	    info->nc2 = sdata->nc[1];
    	}
    	if(sdata->ga[0] || sdata->ga[1])
    	{
    	    info->flags |= AINFO_GA;
    	    info->ga1 = sdata->ga[0];
    	    info->ga2 = sdata->ga[1];
    	}

    	for(i=0;i<n;++i)
    	{
    	    seq = ajSeqsetGetseqSeq(seqset,i);
    	    sqdata = seq->Selexdata->sq;
    	    if((len=ajStrGetLen(sqdata->name)))
    	    {
    		if(len<64)
    		    strcpy(info->sqinfo[i].name,ajStrGetPtr(sqdata->name));
    		else
    		    strncpy(info->sqinfo[i].name,ajStrGetPtr(sqdata->name),63);
    		info->sqinfo[i].name[63]='\0';
    		info->sqinfo[i].flags |= SQINFO_NAME;
    	    }
    / *
    	    if((len=ajStrGetLen(sqdata->id)))
    	    {
    		if(len<64)
    		    strcpy(info->sqinfo[i].id,ajStrGetPtr(sqdata->id));
    		else
    		    strncpy(info->sqinfo[i]->id,ajStrGetPtr(sqdata->id),63);
    		info->sqinfo[i].id[63]='\0';
    		info->sqinfo[i].flags |= SQINFO_ID;
    	    }
    * /

    	    strcpy(info->sqinfo[i].id,info->sqinfo[i].name);
    	    info->sqinfo[i].flags |= SQINFO_ID;
    	    if((len=ajStrGetLen(sqdata->ac)))
    	    {
    		if(len<64)
    		    strcpy(info->sqinfo[i].acc,ajStrGetPtr(sqdata->ac));
    		else
    		    strncpy(info->sqinfo[i].acc,ajStrGetPtr(sqdata->ac),63);
    		info->sqinfo[i].acc[63]='\0';
    		info->sqinfo[i].flags |= SQINFO_ACC;
    	    }
    	    if((len=ajStrGetLen(sqdata->de)))
    	    {
    		if(len<127)
    		    strcpy(info->sqinfo[i].desc,ajStrGetPtr(sqdata->de));
    		else
    		    strncpy(info->sqinfo[i].desc,ajStrGetPtr(sqdata->de),127);
    		info->sqinfo[i].desc[127]='\0';
    		info->sqinfo[i].flags |= SQINFO_DESC;
    	    }
    	    if(sqdata->start || sqdata->stop || sqdata ->len)
    	    {
    		info->sqinfo[i].start = sqdata->start;
    		info->sqinfo[i].stop  = sqdata->stop;
    		info->sqinfo[i].olen  = sqdata->len;
    		info->sqinfo[i].flags |= SQINFO_START;
    		info->sqinfo[i].flags |= SQINFO_STOP;
    		info->sqinfo[i].flags |= SQINFO_OLEN;
    	    }

    	    if(ajStrGetLen(seq->Selexdata->ss))
    	    {

    		info->sqinfo[i].ss = ajCharNewRes(maxlen+1);
    		p = ajStrGetPtr(seq->Selexdata->ss);
    		q = info->sqinfo[i].ss;
    		while((c==*p))
    		{
    		    if(c=='.' || c==' ' || c=='_' || c=='-')
    			*q++ = c;
    		    ++p;
    		}
    		*q = '\0';
    		info->sqinfo[i].flags |= SQINFO_SS;
    	    }
    	}
        }
    / *
        }
    */


    for(i=0; i<n; ++i)
    {
        info->sqinfo[i].type = kOtherSeq;
        if(ajSeqsetIsDna(seqset))
            info->sqinfo[i].type = kDNA;
        if(ajSeqsetIsRna(seqset))
            info->sqinfo[i].type = kRNA;
        if(ajSeqsetIsProt(seqset))
            info->sqinfo[i].type = kAmino;
        info->sqinfo[i].flags |= SQINFO_TYPE;

        seq = ajSeqsetGetseqSeq(seqset,i);

        p = ajSeqGetSeqC(seq);
        cnt = 0;
        while((c=*p))
        {
            if(!(c=='.' || c==' ' || c=='_' || c=='-' || c=='~'))
                ++cnt;
            ++p;
        }
        info->sqinfo[i].len = cnt;
        info->sqinfo[i].flags |= SQINFO_LEN;
    }


    *retseqs = seqs;
    ajStrDel(&tmpstr);

    return;
}
Ejemplo n.º 12
0
int main(int argc, char **argv)
{
    AjPSeqset seqset;
    AjPSeqall seqall;
    AjPSeq queryseq;
    const AjPSeq targetseq;
    ajint wordlen;
    AjPTable wordsTable = NULL;
    AjPList* matchlist = NULL;
    AjPFile logfile;
    AjPFeattable* seqsetftables = NULL;
    AjPFeattable seqallseqftable = NULL;
    AjPFeattabOut ftoutforseqsetseq = NULL;
    AjPFeattabOut ftoutforseqallseq = NULL;
    AjPAlign align = NULL;
    AjIList iter = NULL;
    ajint targetstart;
    ajint querystart;
    ajint len;
    ajuint i, j;
    ajulong nAllMatches = 0;
    ajulong sumAllScore = 0;
    AjBool dumpAlign = ajTrue;
    AjBool dumpFeature = ajTrue;
    AjBool checkmode = ajFalse;
    EmbPWordRK* wordsw = NULL;
    ajuint npatterns = 0;
    ajuint seqsetsize;
    ajuint nmatches;
    ajuint* nmatchesseqset;
    ajuint* lastlocation; /* Cursors for Rabin-Karp search. */
                          /* Shows until what point the query sequence was
                           *  scanned for a pattern sequences in the seqset.
                          */
    char* paddedheader = NULL;
    const char* header;
    AjPStr padding;

    header = "Pattern %S  #pat-sequences  #all-matches  avg-match-length\n";
    padding = ajStrNew();

    embInit("wordmatch", argc, argv);

    wordlen = ajAcdGetInt("wordsize");
    seqset  = ajAcdGetSeqset("asequence");
    seqall  = ajAcdGetSeqall("bsequence");
    logfile = ajAcdGetOutfile("logfile");
    dumpAlign = ajAcdGetToggle("dumpalign");
    dumpFeature = ajAcdGetToggle("dumpfeat");

    if(dumpAlign)
    {
        align = ajAcdGetAlign("outfile");
        ajAlignSetExternal(align, ajTrue);
    }

    seqsetsize = ajSeqsetGetSize(seqset);
    ajSeqsetTrim(seqset);
    AJCNEW0(matchlist, seqsetsize);
    AJCNEW0(seqsetftables, seqsetsize);
    AJCNEW0(nmatchesseqset, seqsetsize);

    if (dumpFeature)
    {
        ftoutforseqsetseq =  ajAcdGetFeatout("aoutfeat");
        ftoutforseqallseq =  ajAcdGetFeatout("boutfeat");
    }

    checkmode = !dumpFeature && !dumpAlign;
    embWordLength(wordlen);

    ajFmtPrintF(logfile, "Small sequence/file for constructing"
	    " target patterns: %S\n", ajSeqsetGetUsa(seqset));
    ajFmtPrintF(logfile, "Large sequence/file to be scanned"
	    " for patterns: %S\n", ajSeqallGetUsa(seqall));
    ajFmtPrintF(logfile, "Number of sequences in the patterns file: %u\n",
            seqsetsize);
    ajFmtPrintF(logfile, "Pattern/word length: %u\n", wordlen);

    for(i=0;i<seqsetsize;i++)
    {
        targetseq = ajSeqsetGetseqSeq(seqset, i);
        embWordGetTable(&wordsTable, targetseq);
    }

    AJCNEW0(lastlocation, seqsetsize);

    if(ajTableGetLength(wordsTable)>0)
    {
        npatterns = embWordRabinKarpInit(wordsTable,
                                       &wordsw, wordlen, seqset);
        ajFmtPrintF(logfile, "Number of patterns/words found: %u\n", npatterns);

        while(ajSeqallNext(seqall,&queryseq))
        {
            for(i=0;i<seqsetsize;i++)
            {
                lastlocation[i]=0;

                if (!checkmode)
                    matchlist[i] = ajListstrNew();
            }

            nmatches = embWordRabinKarpSearch(
                    ajSeqGetSeqS(queryseq), seqset,
                    (EmbPWordRK const *)wordsw, wordlen, npatterns,
                    matchlist, lastlocation, checkmode);
            nAllMatches += nmatches;

            if (checkmode)
        	continue;

            for(i=0;i<seqsetsize;i++)
            {
                if(ajListGetLength(matchlist[i])>0)
                {
                    iter = ajListIterNewread(matchlist[i]) ;

                    while(embWordMatchIter(iter, &targetstart, &querystart, &len,
                            &targetseq))
                    {
                        if(dumpAlign)
                        {
                            ajAlignDefineSS(align, targetseq, queryseq);
                            ajAlignSetScoreI(align, len);
                            /* ungapped alignment means same length
                             *  for both sequences
                            */
                            ajAlignSetSubRange(align, targetstart, 1, len,
                                    ajSeqIsReversed(targetseq),
                                    ajSeqGetLen(targetseq),
                                    querystart, 1, len,
                                    ajSeqIsReversed(queryseq),
                                    ajSeqGetLen(queryseq));
                        }
                    }

                    if(dumpAlign)
                    {
                	ajAlignWrite(align);
                	ajAlignReset(align);
                    }

                    if(ajListGetLength(matchlist[i])>0 && dumpFeature)
                    {
                        embWordMatchListConvToFeat(matchlist[i],
                                                   &seqsetftables[i],
                                                   &seqallseqftable,
                                                   targetseq, queryseq);
                        ajFeattableWrite(ftoutforseqallseq, seqallseqftable);
                        ajFeattableDel(&seqallseqftable);
                    }

                    ajListIterDel(&iter);
                }

                embWordMatchListDelete(&matchlist[i]);
            }
        }

        /* search completed, now report statistics */
        for(i=0;i<npatterns;i++)
        {
            sumAllScore += wordsw[i]->lenMatches;

            for(j=0;j<wordsw[i]->nseqs;j++)
        	nmatchesseqset[wordsw[i]->seqindxs[j]] +=
        		wordsw[i]->nSeqMatches[j];
        }

        ajFmtPrintF(logfile, "Number of sequences in the file scanned "
                "for patterns: %u\n", ajSeqallGetCount(seqall));
        ajFmtPrintF(logfile, "Number of all matches: %Lu"
                " (wordmatch finds exact matches only)\n", nAllMatches);

        if(nAllMatches>0)
        {
            ajFmtPrintF(logfile, "Sum of match lengths: %Lu\n", sumAllScore);
            ajFmtPrintF(logfile, "Average match length: %.2f\n",
        	    sumAllScore*1.0/nAllMatches);

            ajFmtPrintF(logfile, "\nDistribution of the matches among pattern"
        	    " sequences:\n");
            ajFmtPrintF(logfile, "-----------------------------------------"
        	    "-----------\n");

            for(i=0;i<ajSeqsetGetSize(seqset);i++)
            {
        	if (nmatchesseqset[i]>0)
        	    ajFmtPrintF(logfile, "%-42s: %8u\n",
        	                ajSeqGetNameC(ajSeqsetGetseqSeq(seqset, i)),
        	                nmatchesseqset[i]);

        	ajFeattableWrite(ftoutforseqsetseq, seqsetftables[i]);
        	ajFeattableDel(&seqsetftables[i]);
            }

            ajFmtPrintF(logfile, "\nPattern statistics:\n");
            ajFmtPrintF(logfile, "-------------------\n");
            if(wordlen>7)
        	ajStrAppendCountK(&padding, ' ', wordlen-7);
            paddedheader = ajFmtString(header,padding);
            ajFmtPrintF(logfile, paddedheader);

            for(i=0;i<npatterns;i++)
        	if (wordsw[i]->nMatches>0)
        	    ajFmtPrintF(logfile, "%-7s: %12u  %12u %17.2f\n",
        	                wordsw[i]->word->fword, wordsw[i]->nseqs,
        	                wordsw[i]->nMatches,
        	                wordsw[i]->lenMatches*1.0/wordsw[i]->nMatches);
        }

    }

    for(i=0;i<npatterns;i++)
    {
        for(j=0;j<wordsw[i]->nseqs;j++)
            AJFREE(wordsw[i]->locs[j]);

        AJFREE(wordsw[i]->locs);
        AJFREE(wordsw[i]->seqindxs);
        AJFREE(wordsw[i]->nnseqlocs);
        AJFREE(wordsw[i]->nSeqMatches);
        AJFREE(wordsw[i]);
    }

    embWordFreeTable(&wordsTable);

    AJFREE(wordsw);
    AJFREE(matchlist);
    AJFREE(lastlocation);
    AJFREE(nmatchesseqset);
    AJFREE(seqsetftables);

    if(dumpAlign)
    {
        ajAlignClose(align);
        ajAlignDel(&align);
    }

    if(dumpFeature)
    {
        ajFeattabOutDel(&ftoutforseqsetseq);
        ajFeattabOutDel(&ftoutforseqallseq);
    }

    ajFileClose(&logfile);

    ajSeqallDel(&seqall);
    ajSeqsetDel(&seqset);
    ajSeqDel(&queryseq);
    ajStrDel(&padding);
    AJFREE(paddedheader);

    embExit();

    return 0;
}
Ejemplo n.º 13
0
int main(int argc, char **argv) {
  // initialize EMBASSY info
  embInitPV("kcentroidfold", argc, argv, "KBWS", "1.0.9");

  // soap driver and parameter object
  struct soap soap;
  struct ns1__centroidfoldInputParams params;

  char* jobid;

  AjPSeqall seqall; // input sequence
  AjPFile   outf; // outfile
  AjPStr    goutfile; // graph file name
  AjPFile   goutf; // graph file handle

  AjPSeq     seq;
  AjPStr     inseq= NULL;

  AjPStr     substr;

  AjPStr     engine; // CONTRAfold, McCaskill, pfold or AUX
  ajint      gamma;

  // get input/output info
  seqall= ajAcdGetSeqall("seqall");
  outf= ajAcdGetOutfile("outfile");
  goutfile= ajAcdGetString("goutfile");

  // get parameters
  engine= ajAcdGetString("engine");
  gamma=  ajAcdGetInt("gamma");

  // set parameters
  params.model= ajCharNewS(engine);
  params.gamma= gamma;

  while (ajSeqallNext(seqall, &seq)) {
    // initialize
    soap_init(&soap);
    inseq= NULL;

    // convert sequence data to EMBOSS string as fasta format
    ajStrAppendC(&inseq, ">");
    ajStrAppendS(&inseq, ajSeqGetNameS(seq));
    ajStrAppendC(&inseq, "\n");
    ajStrAppendS(&inseq, ajSeqGetSeqS(seq));

    // convert EMBOSS string to char* in C
    char* in0;
    in0= ajCharNewS(inseq);

    // submit query via SOAP and get job ID
    if (soap_call_ns1__runCentroidfold(&soap, NULL, NULL, in0, &params, &jobid) == SOAP_OK) {
    } else {
      soap_print_fault(&soap, stderr);
    }

    // polling
    int check = 0;
    while (check == 0) {
      if (soap_call_ns1__checkStatus(&soap, NULL, NULL, jobid, &check) == SOAP_OK) {
      } else {
	soap_print_fault(&soap, stderr);
      }
      sleep(3);
    }

    // get result (sequence alignment text data)
    char* result;
    if(soap_call_ns1__getMultiResult(&soap, NULL, NULL, jobid, "out", &result) == SOAP_OK) {
      // convert result from C char* to EMBOSS string object
      substr= ajStrNewC(result);

      // output result (EMBOSS string) to file or STDOUT via EMBOSS
      ajFmtPrintF(outf, "%S\n", substr);
    } else {
      soap_print_fault(&soap, stderr); 
    }

    // get result (image file)
    char* image_url;
    if(soap_call_ns1__getMultiResult(&soap, NULL, NULL, jobid, "png", &image_url) == SOAP_OK) {
      goutf= ajFileNewOutNameS(goutfile);
      
      if (!goutf) {
	// can not open image output file
	ajFmtError("Problem writing out image file");
	embExitBad();
      }

      if (!gHttpGetBinC(image_url, &goutf)) {
	// can not download image file
	ajFmtError("Problem downloading image file");
	embExitBad();
      }
    } else {
      soap_print_fault(&soap, stderr); 
    }

  }

  // destruct SOAP driver
  soap_destroy(&soap);
  soap_end(&soap);
  soap_done(&soap);

  // write output file and destruct outfile object
  ajFileClose(&outf);

  // destruct EMBOSS object
  ajSeqallDel(&seqall);
  ajSeqDel(&seq);
  ajStrDel(&substr);
  ajStrDel(&engine);

  // exit
  embExit();
    
  return 0;
}
Ejemplo n.º 14
0
int main(int argc, char *argv[])
{
  embInitPV("gaminoinfo", argc, argv, "GEMBASSY", "1.0.1");

  struct soap soap;

  AjPSeqall seqall;
  AjPSeq    seq;
  AjPStr    inseq = NULL;
  AjPStr    seqid = NULL;

  char *in0;
  char *result;

  AjPFile outf = NULL;

  seqall = ajAcdGetSeqall("sequence");
  outf   = ajAcdGetOutfile("outfile");

  while(ajSeqallNext(seqall, &seq))
    {
      soap_init(&soap);

      inseq = NULL;

      ajStrAssignS(&seqid, ajSeqGetAccS(seq));

      if(!ajStrGetLen(seqid))
         ajStrAssignS(&seqid, ajSeqGetNameS(seq));

      ajStrAppendC(&inseq, ">");
      ajStrAppendS(&inseq, ajSeqGetNameS(seq));
      ajStrAppendC(&inseq, "\n");
      ajStrAppendS(&inseq, ajSeqGetSeqS(seq));

      in0 = ajCharNewS(inseq);

      if(soap_call_ns1__amino_USCOREinfo(
	                                &soap,
					 NULL,
					 NULL,
					 in0,
					&result
                                        ) == SOAP_OK)
	{
          ajFmtPrintF(outf, "Sequence: %S\n%s\n", seqid, result);
	}
      else
	{
	  soap_print_fault(&soap, stderr);
	}

      soap_destroy(&soap);
      soap_end(&soap);
      soap_done(&soap);

      AJFREE(in0);

      ajStrDel(&inseq);
    }

  ajFileClose(&outf);

  ajSeqallDel(&seqall);
  ajSeqDel(&seq);

  embExit();

  return 0;
}
Ejemplo n.º 15
0
static void primersearch_scan_seq(const Primer primdata,
			     const AjPSeq seq, AjBool reverse)
{
    AjPStr seqstr = NULL;
    AjPStr revstr = NULL;
    AjPStr seqname = NULL;
    ajuint fhits = 0;
    ajuint rhits = 0;
    AjPList fhits_list = NULL;
    AjPList rhits_list = NULL;

    /* initialise variables for search */
    ajStrAssignC(&seqname,ajSeqGetNameC(seq));
    ajStrAssignS(&seqstr, ajSeqGetSeqS(seq));
    ajStrAssignS(&revstr, ajSeqGetSeqS(seq));
    ajStrFmtUpper(&seqstr);
    ajStrFmtUpper(&revstr);
    ajSeqstrReverse(&revstr);
    fhits_list = ajListNew();
    rhits_list = ajListNew();

    if(!reverse)
    {
	/* test OligoA against forward sequence, and OligoB against reverse */
	embPatFuzzSearch(primdata->forward->type,
			 ajSeqGetBegin(seq),
			 primdata->forward->patstr,
			 seqname,
			 seqstr,
			 fhits_list,
			 primdata->forward->len,
			 primdata->forward->mm,
			 primdata->forward->amino,
			 primdata->forward->carboxyl,
			 primdata->forward->buf,
			 primdata->forward->off,
			 primdata->forward->sotable,
			 primdata->forward->solimit,
			 primdata->forward->re,
			 primdata->forward->skipm,
			 &fhits,
			 primdata->forward->real_len,
			 &(primdata->forward->tidy));

	if(fhits)
	    embPatFuzzSearch(primdata->reverse->type,
			     ajSeqGetBegin(seq),
			     primdata->reverse->patstr,
			     seqname,
			     revstr,
			     rhits_list,
			     primdata->reverse->len,
			     primdata->reverse->mm,
			     primdata->reverse->amino,
			     primdata->reverse->carboxyl,
			     primdata->reverse->buf,
			     primdata->reverse->off,
			     primdata->reverse->sotable,
			     primdata->reverse->solimit,
			     primdata->reverse->re,
			     primdata->reverse->skipm,
			     &rhits,
			     primdata->reverse->real_len,
			     &(primdata->reverse->tidy));
    }
    else
    {
	/*test OligoB against forward sequence, and OligoA against reverse  */
	embPatFuzzSearch(primdata->reverse->type,
			 ajSeqGetBegin(seq),
			 primdata->reverse->patstr,
			 seqname,
			 seqstr,
			 fhits_list,
			 primdata->reverse->len,
			 primdata->reverse->mm,
			 primdata->reverse->amino,
			 primdata->reverse->carboxyl,
			 primdata->reverse->buf,
			 primdata->reverse->off,
			 primdata->reverse->sotable,
			 primdata->reverse->solimit,
			 primdata->reverse->re,
			 primdata->reverse->skipm,
			 &fhits,
			 primdata->reverse->real_len,
			 &(primdata->reverse->tidy));

	if(fhits)
	    embPatFuzzSearch(primdata->forward->type,
			     ajSeqGetBegin(seq),
			     primdata->forward->patstr,
			     seqname,
			     revstr,
			     rhits_list,
			     primdata->forward->len,
			     primdata->forward->mm,
			     primdata->forward->amino,
			     primdata->forward->carboxyl,
			     primdata->forward->buf,
			     primdata->forward->off,
			     primdata->forward->sotable,
			     primdata->forward->solimit,
			     primdata->forward->re,
			     primdata->forward->skipm,
			     &rhits,
			     primdata->forward->real_len,
			     &(primdata->forward->tidy));
    }

    if(fhits && rhits)
	/* get amplimer length(s) and write out the hit */
	primersearch_store_hits(primdata, fhits_list, rhits_list,
				seq, reverse);

    /* tidy up */
    primersearch_clean_hitlist(&fhits_list);
    primersearch_clean_hitlist(&rhits_list);

    ajStrDel(&seqstr);
    ajStrDel(&revstr);
    ajStrDel(&seqname);

    return;
}
Ejemplo n.º 16
0
int main(int argc, char *argv[])
{
  embInitPV("gpalindrome", argc, argv, "GEMBASSY", "1.0.3");

  struct soap soap;
  struct ns1__palindromeInputParams params;

  AjPSeqall seqall;
  AjPSeq    seq;
  AjPStr    inseq = NULL;
  AjPStr    seqid = NULL;
  ajint	    shortest = 0;
  ajint	    loop = 0;
  AjBool    gtmatch = 0;

  char *in0;
  char *result;

  AjPFile outf = NULL;

  seqall   = ajAcdGetSeqall("sequence");
  shortest = ajAcdGetInt("shortest");
  loop     = ajAcdGetInt("loop");
  gtmatch  = ajAcdGetBoolean("gtmatch");
  outf     = ajAcdGetOutfile("outfile");

  params.shortest = shortest;
  params.loop     = loop;
  params.gtmatch  = gtmatch;
  params.output   = "f";

  while(ajSeqallNext(seqall, &seq))
    {
      soap_init(&soap);

      inseq = NULL;

      ajStrAppendC(&inseq, ">");
      ajStrAppendS(&inseq, ajSeqGetNameS(seq));
      ajStrAppendC(&inseq, "\n");
      ajStrAppendS(&inseq, ajSeqGetSeqS(seq));

      ajStrAssignS(&seqid, ajSeqGetAccS(seq));

      in0 = ajCharNewS(inseq);

      if(soap_call_ns1__palindrome(
				  &soap,
                                   NULL,
                                   NULL,
				   in0,
                                  &params,
                                  &result
				  ) == SOAP_OK)
        {
          ajFmtPrintF(outf, "Sequence: %S\n", seqid);

          if(!gFileOutURLC(result, &outf))
            {
              ajDie("File downloading error from:\n%s\n", result);
              embExitBad();
            }
        }
      else
        {
          soap_print_fault(&soap, stderr);
        }

      soap_destroy(&soap);
      soap_end(&soap);
      soap_done(&soap);

      AJFREE(in0);

      ajStrDel(&inseq);
    }

  ajFileClose(&outf);

  ajSeqallDel(&seqall);
  ajSeqDel(&seq);
  ajStrDel(&seqid);

  embExit();

  return 0;
}
Ejemplo n.º 17
0
int main(int argc, char **argv)
{

    AjPSeqall seqall;
    AjPFile primfile;
    AjPStr rdline = NULL;

    Primer primdata;
    AjPStrTok handle = NULL;

    AjPList primList = NULL;

    embInit("stssearch", argc, argv);

    primfile = ajAcdGetInfile("infile");
    out      = ajAcdGetOutfile("outfile");
    seqall   = ajAcdGetSeqall("seqall");

    while(ajReadlineTrim(primfile, &rdline))
    {
	if(ajStrGetCharFirst(rdline) == '#')
	    continue;
	if(ajStrSuffixC(rdline, ".."))
	    continue;

	AJNEW(primdata);
	primdata->Name   = NULL;
	primdata->Oligoa = NULL;
	primdata->Oligob = NULL;

	handle = ajStrTokenNewC(rdline, " \t");
	ajStrTokenNextParse(&handle, &primdata->Name);

	if(!(nprimers % 1000))
	    ajDebug("Name [%d]: '%S'\n", nprimers, primdata->Name);

	ajStrTokenNextParse(&handle, &primdata->Oligoa);
	ajStrFmtUpper(&primdata->Oligoa);
	primdata->Prima = ajRegComp(primdata->Oligoa);

	ajStrTokenNextParse(&handle, &primdata->Oligob);
	ajStrFmtUpper(&primdata->Oligob);
	primdata->Primb = ajRegComp(primdata->Oligob);
	ajStrTokenDel(&handle);

	if(!nprimers)
	    primList = ajListNew();

	ajListPushAppend(primList, primdata);
	nprimers++;
    }

    if(!nprimers)
	ajFatal("No primers read\n");

    ajDebug("%d primers read\n", nprimers);

    while(ajSeqallNext(seqall, &seq))
    {
	ajSeqFmtUpper(seq);
	ajStrAssignS(&seqstr, ajSeqGetSeqS(seq));
	ajStrAssignS(&revstr, ajSeqGetSeqS(seq));
	ajSeqstrReverse(&revstr);
	ajDebug("Testing: %s\n", ajSeqGetNameC(seq));
	ntests = 0;
	ajListMap(primList, stssearch_primTest, NULL);
    }

    ajFileClose(&out);

    ajSeqallDel(&seqall);
    ajSeqDel(&seq);
    ajFileClose(&out);
    ajStrDel(&revstr);
    ajStrDel(&seqstr);
    ajFileClose(&primfile);
    ajListMap(primList, stssearch_primDel, NULL);
    ajListFree(&primList);
    ajStrDel(&rdline);


    embExit();

    return 0;
}
Ejemplo n.º 18
0
int main(int argc, char **argv)
{
    AjPSeqall nucseq;		/* input nucleic sequences */
    AjPSeqset protseq;		/* input aligned protein sequences */
    AjPSeqout seqout;
    AjPSeq nseq;		/* next nucleic sequence to align */
    const AjPSeq pseq;		/* next protein sequence use in alignment */
    AjPTrn trnTable;
    AjPSeq pep;			/* translation of nseq */
    AjPStr tablelist;
    ajint table;
    AjPSeqset outseqset;	/* set of aligned nucleic sequences */
    ajint proteinseqcount = 0;
    AjPStr degapstr = NULL;
    /* used to check if it matches with START removed */
    AjPStr degapstr2 = NULL;
    AjPStr codon = NULL;	/* holds temporary codon to check if is START */
    char aa;			/* translated putative START codon */
    ajint type;			/* returned type of the putative START codon */
    /* start position of guide protein in translation */
    ajlong pos = 0;
    AjPSeq newseq = NULL;	/* output aligned nucleic sequence */
    ajint frame;

    embInit("tranalign", argc, argv);

    nucseq    = ajAcdGetSeqall("asequence");
    protseq   = ajAcdGetSeqset("bsequence");
    tablelist = ajAcdGetListSingle("table");
    seqout    = ajAcdGetSeqoutset("outseq");

    outseqset = ajSeqsetNew();
    degapstr  = ajStrNew();

    /* initialise the translation table */
    ajStrToInt(tablelist, &table);
    trnTable = ajTrnNewI(table);

    ajSeqsetFill(protseq);

    while(ajSeqallNext(nucseq, &nseq))
    {
    	if((pseq = ajSeqsetGetseqSeq(protseq, proteinseqcount++)) == NULL)
    	    ajErr("No guide protein sequence available for "
		  "nucleic sequence %S",
		  ajSeqGetNameS(nseq));

	ajDebug("Aligning %S and %S\n",
		ajSeqGetNameS(nseq), ajSeqGetNameS(pseq));

        /* get copy of pseq string with no gaps */
        ajStrAssignS(&degapstr, ajSeqGetSeqS(pseq));
        ajStrRemoveGap(&degapstr);

        /*
	** for each translation frame look for subset of pep that
	** matches pseq
	*/
        for(frame = 1; frame <4; frame++)
	{
	    ajDebug("trying frame %d\n", frame);
            pep = ajTrnSeqOrig(trnTable, nseq, frame);
            degapstr2 = ajStrNew();
            ajStrAssignRef(&degapstr2, degapstr);
            pos = ajStrFindCaseS(ajSeqGetSeqS(pep), degapstr);

            /* 
            ** we might have a START codon that should be translated as 'M'
            ** we need to check if there is a match after a possible START
            ** codon 
            */
            if(pos == -1 && ajStrGetLen(degapstr) > 1 && 
                (ajStrGetPtr(degapstr)[0] == 'M' ||
		 ajStrGetPtr(degapstr)[0] == 'm'))
	      {
                /* see if pep minus the first character is a match */
                ajStrCutStart(&degapstr2, 1);
                pos = ajStrFindCaseS(ajSeqGetSeqS(pep), degapstr2); 

                /*
		** pos is >= 1 if we have a match that is after the first
		** residue
		*/
                if(pos >= 1)
		{
                    /* point back at the putative START Methionine */
                    pos--;
                    /* test if first codon is a START */
                    codon = ajStrNew();
                    ajStrAssignSubS(&codon, ajSeqGetSeqS(nseq), 
                                (pos*3)+frame-1, (pos*3)+frame+2);
                    type = ajTrnCodonstrTypeS(trnTable, codon, &aa);

                    if(type != 1)
                    {
                        /* first codon is not a valid START, force a mismatch */
                        pos = -1;
                    }
                    ajStrDel(&codon);
                
            	}
		else
		{
                    /* force 'pos == 0' to be treated as a mismatch */
            	    pos = -1;
		}
            }

            ajStrDel(&degapstr2);
            ajSeqDel(&pep);

            if(pos != -1)
            	break;
        }

        if(pos == -1)
	    ajErr("Guide protein sequence %S not found in nucleic sequence %S",
		  ajSeqGetNameS(pseq), ajSeqGetNameS(nseq));
	else
	{
	    ajDebug("got a match with frame=%d\n", frame);
            /* extract the coding region of nseq with gaps */
            newseq = ajSeqNew();
            ajSeqSetNuc(newseq);
            ajSeqAssignNameS(newseq, ajSeqGetNameS(nseq));
            ajSeqAssignDescS(newseq, ajSeqGetDescS(nseq));
            tranalign_AddGaps(newseq, nseq, pseq, (pos*3)+frame-1);

            /* output the gapped nucleic sequence */
            ajSeqsetApp(outseqset, newseq);

            ajSeqDel(&newseq);
        }

        ajStrRemoveWhiteExcess(&degapstr);
    }

    ajSeqoutWriteSet(seqout, outseqset);
    ajSeqoutClose(seqout);

    ajTrnDel(&trnTable);
    ajSeqsetDel(&outseqset);
    ajStrDel(&degapstr);
    ajStrDel(&degapstr2);

    ajSeqallDel(&nucseq);
    ajSeqDel(&nseq);
    ajSeqoutDel(&seqout);
    ajSeqsetDel(&protseq);
    ajStrDel(&tablelist);

    embExit();

    return 0;
}
Ejemplo n.º 19
0
int main(int argc, char *argv[])
{
  embInitPV("gcgr", argc, argv, "GEMBASSY", "1.0.1");

  struct soap soap;
  struct ns1__cgrInputParams params;

  AjPSeqall seqall;
  AjPSeq    seq;
  AjPStr    inseq    = NULL;
  AjPStr    seqid    = NULL;
  ajint	    width    = 0;
  AjPFile   outf     = NULL;
  AjPStr    filename = NULL;
  AjPStr    outfname = NULL;
  AjPStr    format   = NULL;

  ajint i;

  char *in0;
  char *result;

  seqall   = ajAcdGetSeqall("sequence");
  width    = ajAcdGetInt("width");
  filename = ajAcdGetString("goutfile");
  format   = ajAcdGetString("format");

  params.width = width;

  i = 0;

  while(ajSeqallNext(seqall, &seq))
    {
      soap_init(&soap);

      inseq = NULL;

      ajStrAppendC(&inseq, ">");
      ajStrAppendS(&inseq, ajSeqGetAccS(seq));
      ajStrAppendC(&inseq, "\n");
      ajStrAppendS(&inseq, ajSeqGetSeqS(seq));

      ajStrAssignS(&seqid, ajSeqGetAccS(seq));

      in0 = ajCharNewS(inseq);

      if(soap_call_ns1__cgr(
                           &soap,
			    NULL,
			    NULL,
			    in0,
			   &params,
			   &result
                           ) == SOAP_OK)
	{
          ++i;

	  outfname = ajStrNewS(ajFmtStr("%S.%d.%S",
                                        filename,
                                        i,
                                        format));

	  outf = ajFileNewOutNameS(outfname);

          if(!outf)
            {
              ajDie("File open error\n");
            }

          if(!ajStrMatchC(format, "png"))
            {
              if(!gHttpConvertC(result, &outf, ajStrNewC("png"), format))
                {
                  ajDie("File downloading error from:\n%s\n", result);
                }
              else
                {
                  ajFmtPrint("Created %S\n", outfname);
                }
            }
          else
            {
              if(!gHttpGetBinC(result, &outf))
                {
                  ajDie("File downloading error from:\n%s\n", result);
                }
              else
                {
                  ajFmtPrint("Created %S\n", outfname);
                }
            }

	  ajStrDel(&outfname);
	}
      else
	{
	  soap_print_fault(&soap, stderr);
	}

      soap_destroy(&soap);
      soap_end(&soap);
      soap_done(&soap);

      AJFREE(in0);

      ajStrDel(&inseq);
    }

  ajSeqallDel(&seqall);
  ajSeqDel(&seq);

  ajStrDel(&filename);

  embExit();
}
Ejemplo n.º 20
0
static void extractfeat_GetRegionPad(const AjPSeq seq, AjPStr *featstr,
				     ajint start, ajint end, AjBool sense,
				     AjBool beginning)
{
    ajint tmp;
    ajint pad;

    AjPStr result;

    ajDebug("In extractfeat_GetRegionPad start=%d, end=%d\n", start, end);

    result = ajStrNew();


    if(start > end)
    	return;

    if(start < 0)
    {
        pad = -start;
        if(ajSeqIsNuc(seq))
            ajStrAppendCountK(&result, 'N', pad);
        else
            ajStrAppendCountK(&result, 'X', pad);
        start = 0;
    }

    if(end > (ajint) ajSeqGetLen(seq)-1)
    	tmp = ajSeqGetLen(seq)-1;
    else
    	tmp = end;

    if(start <= (ajint) ajSeqGetLen(seq) && tmp >= 0)
    {
        ajDebug("Get subsequence %d-%d\n", start, tmp);
        ajStrAppendSubS(&result, ajSeqGetSeqS(seq), start, tmp);
        ajDebug("result=%S\n", result);
    }

    if(end > (ajint) ajSeqGetLen(seq)-1)
    {
        pad = end - ajSeqGetLen(seq)+1;
        if(ajSeqIsNuc(seq))
            ajStrAppendCountK(&result, 'N', pad);
        else
            ajStrAppendCountK(&result, 'X', pad);
        ajDebug("result=%S\n", result);
    }


    /* if feature was in reverse sense, then get reverse complement */
    if(!sense)
    {
	ajDebug("get reverse sense of subsequence\n");
    	ajSeqstrReverse(&result);
	ajDebug("result=%S\n", result);
    }

    if(beginning)
    {
	ajDebug("Prepend to featstr: %S\n", result);
        ajStrInsertS(featstr, 0, result);
    }
    else
    {
	ajDebug("Append to featstr: %S\n", result);
    	ajStrAppendS(featstr, result);
    }
    ajDebug("featstr=%S\n", *featstr);


    ajStrDel(&result);

    return;
}
Ejemplo n.º 21
0
int main(int argc, char **argv)
{
    AjPSeqall	seqall;
    AjPSeqout	seqout;
    AjPSeq seq = NULL;
    AjPStr str = NULL;
    AjPStr desc = NULL;
    ajint  tail3;
    ajint  tail5 = 0;
    ajint  minlength;
    ajint  mismatches;
    AjBool reverse;
    AjBool fiveprime;
    AjBool cvttolower;

    embInit("trimest", argc, argv);

    seqall 	= ajAcdGetSeqall("sequence");
    seqout 	= ajAcdGetSeqoutall("outseq");
    minlength 	= ajAcdGetInt("minlength");
    mismatches 	= ajAcdGetInt("mismatches");
    reverse	= ajAcdGetBoolean("reverse");
    fiveprime	= ajAcdGetBoolean("fiveprime");
    cvttolower  = ajAcdGetToggle("tolower");
    

    str = ajStrNew();

    while(ajSeqallNext(seqall, &seq))
    {
        /* get sequence description */
        ajStrAssignS(&desc, ajSeqGetDescS(seq));

        /* get positions to cut in 5' poly-T and 3' poly-A tails */
	if(fiveprime)
	    tail5 = trimest_get_tail(seq, 5, minlength, mismatches);
	tail3 = trimest_get_tail(seq, 3, minlength, mismatches);

	/* get a COPY of the sequence string */
	ajStrAssignS(&str, ajSeqGetSeqS(seq));

        /* cut off longest of 3' or 5' tail */
	if(tail5 > tail3)
	{
	    /* if 5' poly-T tail, then reverse the sequence */
	    ajDebug("Tail=%d\n", tail5);
            if(cvttolower)
                trimest_tolower(&str, 0, tail5-1);
            else
	        ajStrKeepRange(&str, tail5, ajSeqGetLen(seq)-1);
	    ajStrAppendC(&desc, " [poly-T tail removed]");

	}
	else if(tail3 > tail5)
	{
	    /* remove 3' poly-A tail */
	    ajDebug("Tail=%d\n", tail3);
            if(cvttolower)
                trimest_tolower(&str, ajSeqGetLen(seq)-tail3,
				ajSeqGetLen(seq));
            else
	        ajStrKeepRange(&str, 0, ajSeqGetLen(seq)-tail3-1);
            ajStrAppendC(&desc, " [poly-A tail removed]");

	}

        /* write sequence out */
	ajSeqAssignSeqS(seq, str);

	/* reverse complement if poly-T found */
	if(tail5 > tail3 && reverse)
	{
	    ajSeqReverseForce(seq);
	    ajStrAppendC(&desc, " [reverse complement]");
	}

        /* set description */
        ajSeqAssignDescS(seq, desc);

	ajSeqoutWriteSeq(seqout, seq);
    }

    ajSeqoutClose(seqout);

    ajStrDel(&str);
    ajStrDel(&desc);

    ajSeqallDel(&seqall);
    ajSeqDel(&seq);
    ajSeqoutDel(&seqout);

    embExit();

    return 0;
}
Ejemplo n.º 22
0
int main(int argc, char **argv)
{
    AjPSeq seq;
    AjPGraph graph = 0;
    AjPFile   outf = NULL;
    AjPFile file   = NULL;
    AjPStr buffer  = NULL;
    float twist[4][4][4];
    float roll[4][4][4];
    float tilt[4][4][4];
    float rbend;
    float rcurve;
    float bendscale;
    float curvescale;
    float twistsum = (float) 0.0;
    float pi       = (float) 3.14159;
    float pifac    = (pi/(float) 180.0);
    float pi2      = pi/(float) 2.0;
    ajint *iseq    = NULL;
    float *x;
    float *y;
    float *xave;
    float *yave;
    float *curve;
    float *bend;
    const char *ptr;
    ajint i;
    ajint ii;
    ajint k;
    ajint j;
    char residue[2];
    float maxbend;
    float minbend;
    float bendfactor;
    float maxcurve;
    float mincurve;
    float curvefactor;

    float fxp;
    float fyp;
    float yincr;
    float yy1;
    ajint ixlen;
    ajint iylen;
    ajint ixoff;
    ajint iyoff;
    float ystart;
    float defheight;
    float currentheight;
    ajint count;
    ajint portrait = 0;
    ajint title    = 0;
    ajint numres;
    ajint ibeg;
    ajint iend;
    ajint ilen;
    AjPStr sstr = NULL;
    ajint ipos;
    float dx;
    float dy;
    float rxsum;
    float rysum;
    float yp1;
    float yp2;
    double td;
    
    embInit("banana", argc, argv);
    seq    = ajAcdGetSeq("sequence");
    file   = ajAcdGetDatafile("anglesfile");
    outf   = ajAcdGetOutfile("outfile");
    graph  = ajAcdGetGraph("graph");
    numres = ajAcdGetInt("residuesperline");

    ibeg = ajSeqGetBegin(seq);
    iend = ajSeqGetEnd(seq);

    ajStrAssignSubS(&sstr, ajSeqGetSeqS(seq), ibeg-1, iend-1);
    ilen = ajStrGetLen(sstr);

    AJCNEW0(iseq,ilen+1);
    AJCNEW0(x,ilen+1);
    AJCNEW0(y,ilen+1);
    AJCNEW0(xave,ilen+1);
    AJCNEW0(yave,ilen+1);
    AJCNEW0(curve,ilen+1);
    AJCNEW0(bend,ilen+1);

    ptr= ajStrGetPtr(sstr);

    for(ii=0;ii<ilen;ii++)
    {
	if(*ptr=='A' || *ptr=='a')
	    iseq[ii+1] = 0;
	else if(*ptr=='T' || *ptr=='t')
	    iseq[ii+1] = 1;
	else if(*ptr=='G' || *ptr=='g')
	    iseq[ii+1] = 2;
	else if(*ptr=='C' || *ptr=='c')
	    iseq[ii+1] = 3;
	else
	    ajErr("%c is not an ATCG hence not valid",*ptr);

	ptr++;
    }


    if(!file)
	ajErr("Banana failed to open angle file");

    ajReadline(file,&buffer);		/* 3 junk lines */
    ajReadline(file,&buffer);
    ajReadline(file,&buffer);

    for(k=0;k<4;k++)
	for(ii=0;ii<4;ii++)
	{
	    if(ajReadline(file,&buffer))
	    {
		sscanf(ajStrGetPtr(buffer),"%f,%f,%f,%f",
		       &twist[ii][0][k],&twist[ii][1][k],&twist[ii][2][k],
		       &twist[ii][3][k]);
	    }
	    else
		ajErr("Error reading angle file");

	    for(j=0;j<4;j++)
		twist[ii][j][k] *= pifac;
	}


    for(k=0;k<4;k++)
	for(ii=0;ii<4;ii++)
	    if(ajReadline(file,&buffer))
	    {
		sscanf(ajStrGetPtr(buffer),"%f,%f,%f,%f",&roll[ii][0][k],
		       &roll[ii][1][k],&roll[ii][2][k],&roll[ii][3][k]);
	    }
	    else
		ajErr("Error reading angle file");


    for(k=0;k<4;k++)
	for(ii=0;ii<4;ii++)
	    if(ajReadline(file,&buffer))
		sscanf(ajStrGetPtr(buffer),"%f,%f,%f,%f",&tilt[ii][0][k],
		       &tilt[ii][1][k],&tilt[ii][2][k],&tilt[ii][3][k]);
	    else
		ajErr("Error reading angle file");


    if(ajReadline(file,&buffer))
	sscanf(ajStrGetPtr(buffer),"%f,%f,%f,%f",&rbend,&rcurve,
	       &bendscale,&curvescale);
    else
	ajErr("Error reading angle file");

    ajFileClose(&file);
    ajStrDel(&buffer);


    for(ii=1;ii<ilen-1;ii++)
    {
	twistsum += twist[iseq[ii]][iseq[ii+1]][iseq[ii+2]];
	dx = (roll[iseq[ii]][iseq[ii+1]][iseq[ii+2]]*sinfban(twistsum)) +
	    (tilt[iseq[ii]][iseq[ii+1]][iseq[ii+2]]*sinfban(twistsum-pi2));
	dy = roll[iseq[ii]][iseq[ii+1]][iseq[ii+2]]*cosfban(twistsum) +
	    tilt[iseq[ii]][iseq[ii+1]][iseq[ii+2]]*cosfban(twistsum-pi2);

	x[ii+1] = x[ii]+dx;
	y[ii+1] = y[ii]+dy;

    }

    for(ii=6;ii<ilen-6;ii++)
    {
	rxsum = 0.0;
	rysum = 0.0;
	for(k=-4;k<=4;k++)
	{
	    rxsum+=x[ii+k];
	    rysum+=y[ii+k];
	}
	rxsum+=(x[ii+5]*(float)0.5);
	rysum+=(y[ii+5]*(float)0.5);
	rxsum+=(x[ii-5]*(float)0.5);
	rysum+=(y[ii-5]*(float)0.5);

	xave[ii] = rxsum*(float)0.1;
	yave[ii] = rysum*(float)0.1;
    }

    for(i=(ajint)rbend+1;i<=ilen-(ajint)rbend-1;i++)
    {
	td = sqrt(((x[i+(ajint)rbend]-x[i-(ajint)rbend])*
		   (x[i+(ajint)rbend]-x[i-(ajint)rbend])) +
		  ((y[i+(ajint)rbend]-y[i-(ajint)rbend])*
		   (y[i+(ajint)rbend]-y[i-(ajint)rbend])));
	bend[i] = (float) td;
	bend[i]*=bendscale;
    }

    for(i=(ajint)rcurve+6;i<=ilen-(ajint)rcurve-6;i++)
    {
	td = sqrt(((xave[i+(ajint)rcurve]-
		    xave[i-(ajint)rcurve])*(xave[i+(ajint)rcurve]-
					    xave[i-(ajint)rcurve]))+
		  ((yave[i+(ajint)rcurve]-yave[i-(ajint)rcurve])*
		   (yave[i+(ajint)rcurve]-yave[i-(ajint)rcurve])));
	curve[i] = (float) td;
    }
    

    if(outf)
    {
	ajFmtPrintF(outf,"Base   Bend      Curve\n");
	ptr = ajStrGetPtr(sstr);
	for(ii=1;ii<=ilen;ii++)
	{
	    ajFmtPrintF(outf,"%c    %6.1f   %6.1f\n",
			*ptr, bend[ii], curve[ii]);
	    ptr++;
	}
	ajFileClose(&outf);
    }

    if(graph)
    {
	maxbend  = minbend  = 0.0;
	maxcurve = mincurve = 0.0;
	for(ii=1;ii<=ilen;ii++)
	{
	    if(bend[ii] > maxbend)
		maxbend = bend[ii];
	    if(bend[ii] < minbend)
		minbend = bend[ii];
	    if(curve[ii] > maxcurve)
		maxcurve = curve[ii];
	    if(curve[ii] < mincurve)
		mincurve = curve[ii];
	}

	ystart = 75.0;

	ajGraphAppendTitleS(graph, ajSeqGetUsaS(seq));

        ajGraphicsSetPagesize(960, 768);

	ajGraphOpenWin(graph,(float)-1.0, (float)numres+(float)10.0,
		       (float)0.0, ystart+(float)5.0);

	ajGraphicsGetParamsPage(&fxp,&fyp,&ixlen,&iylen,&ixoff,&iyoff);

	if(portrait)
        {
            ixlen = 768;
            iylen = 960;
        }
        else
        {
            ixlen = 960;
            iylen = 768;
        }

	ajGraphicsGetCharsize(&defheight,&currentheight);
	if(!currentheight)
	{
	    defheight = currentheight = (float) 4.440072;
	    currentheight = defheight *
		((float)ixlen/ ((float)(numres)*(currentheight+(float)1.0)))
		    /currentheight;
	}
	ajGraphicsSetCharscale(((float)ixlen/((float)(numres)*
					  (currentheight+(float)1.0)))/
			    currentheight);
	ajGraphicsGetCharsize(&defheight,&currentheight);

	yincr = (currentheight + (float)3.0)*(float)0.3;

	if(!title)
	    yy1 = ystart;
	else
	    yy1 = ystart-(float)5.0;

	count = 1;

	residue[1]='\0';

	bendfactor = (3*yincr)/maxbend;
	curvefactor = (3*yincr)/maxcurve;

	ptr = ajStrGetPtr(sstr);

	yy1 = yy1-(yincr*((float)5.0));
	for(ii=1;ii<=ilen;ii++)
	{
	    if(count > numres)
	    {
		yy1 = yy1-(yincr*((float)5.0));
		if(yy1<1.0)
		{
		    if(!title)
			yy1=ystart;
		    else
			yy1 = ystart-(float)5.0;

		    yy1 = yy1-(yincr*((float)5.0));
		    ajGraphNewpage(graph,AJFALSE);
		}
		count = 1;
	    }
	    residue[0] = *ptr;

	    ajGraphicsDrawposTextAtend((float)(count)+(float)2.0,yy1,residue);

	    if(ii>1 && ii < ilen)
	    {
		yp1 = yy1+yincr + (bend[ii]*bendfactor);
		yp2 = yy1+yincr + (bend[ii+1]*bendfactor);
		ajGraphicsDrawposLine((float)count+(float)1.5,yp1,
			    (float)(count)+(float)2.5,yp2);
	    }

	    ipos = ilen-(ajint)rcurve-7;
	    if(ipos < 0)
		ipos = 0;

	    if(ii>rcurve+5 && ii<ipos)
	    {
		yp1 = yy1+yincr + (curve[ii]*curvefactor);
		yp2 = yy1+yincr + (curve[ii+1]*curvefactor);
		ajGraphicsDrawposLine((float)count+(float)1.7,yp1,
			    (float)(count)+(float)2.3,yp2);
	    }

	    ajGraphicsDrawposLine((float)count+(float)1.5,yy1+yincr,
			(float)(count)+(float)2.5,yy1+yincr);

	    count++;
	    ptr++;
	}

	ajGraphicsClose();
    }

    AJFREE(iseq);
    AJFREE(x);
    AJFREE(y);
    AJFREE(xave);
    AJFREE(yave);
    AJFREE(curve);
    AJFREE(bend);

    ajStrDel(&sstr);

    ajSeqDel(&seq);
    ajFileClose(&file);
    ajFileClose(&outf);
    ajGraphxyDel(&graph);

    embExit();

    return 0;
}
Ejemplo n.º 23
0
void embPatternRegexSearch (AjPFeattable ftable, const AjPSeq seq,
			    const AjPPatternRegex pat, AjBool reverse)
{
    ajint pos=0;
    ajint off;
    ajint len;
    AjPFeature sf    = NULL;
    AjPStr substr    = NULL;
    AjPStr seqstr    = NULL;
    AjPStr tmpstr = NULL;
    AjPStr tmp       = ajStrNew();
    AjPRegexp patexp = ajPatternRegexGetCompiled(pat);
    ajint adj;
    AjBool isreversed;
    AjPSeq revseq;
    ajint seqlen;

    seqlen = ajSeqGetLen(seq);
    if(!seqlen)
        return;

    isreversed = ajSeqIsReversedTrue(seq);

    if(isreversed)
	seqlen += ajSeqGetOffset(seq);

    pos = ajSeqGetBeginTrue(seq);
    adj = ajSeqGetEndTrue(seq);

    if(!ajStrGetLen(featMotifProt))
        ajStrAssignC(&featMotifProt, "SO:0001067");

    if(!ajStrGetLen(featMotifNuc))
        ajStrAssignC(&featMotifNuc, "SO:0000714");

    /*ajDebug("embPatternRegexSearch pos: %d adj: %d reverse: %B\n",
	   pos, adj, reverse, isreversed);*/
    /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n",
	   seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq),
	   ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq));*/

    if (reverse)
    {
        revseq = ajSeqNewSeq(seq);
        ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq), pos-1, adj-1);
        ajSeqstrReverse(&seqstr);
    }

    ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq), pos-1, adj-1);

    ajStrFmtUpper(&seqstr);

    while(ajStrGetLen(seqstr) && ajRegExec(patexp, seqstr))
    {
	off = ajRegOffset(patexp);
	len = ajRegLenI(patexp, 0);

	if(off || len)
	{
	    ajRegSubI(patexp, 0, &substr);
	    ajRegPost(patexp, &tmp);
	    ajStrAssignS(&seqstr, substr);
            ajStrAppendS(&seqstr, tmp);
	    pos += off;

	    /*ajDebug("match pos: %d adj: %d len: %d off:%d\n",
                    pos, adj, len, off);*/
            if (reverse)
                sf = ajFeatNew(ftable, NULL, featMotifNuc,
                                   adj - pos - len + 2,
                                   adj - pos + 1,
                                   0.0, '-', 0);
	    else
            {
                if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable))
                    sf = ajFeatNewProt(ftable, NULL, featMotifProt,
                                       pos, pos + len - 1,
                                       0.0);
                else
                    sf = ajFeatNew(ftable, NULL, featMotifNuc,
                                   pos, pos + len - 1,
                                   0.0, '.', 0);
            }
            
	    if(isreversed)
		ajFeatReverse(sf, seqlen);

	    ajFmtPrintS (&tmpstr,"*pat %S: %S",
			 ajPatternRegexGetName(pat),
                         ajPatternRegexGetPattern(pat));
	    ajFeatTagAdd (sf,NULL,tmpstr);
	    pos += 1;
	    ajStrCutStart(&seqstr, 1);
	}
	else
	{
	    pos++;
	    ajStrCutStart(&seqstr, 1);
	}
    }

    ajStrDel(&tmpstr);
    ajStrDel(&tmp);
    ajStrDel(&substr);
    ajStrDel(&seqstr);

    if(reverse)
	ajSeqDel(&revseq);

    return;
}
Ejemplo n.º 24
0
int main(int argc, char *argv[])
{
  embInitPV("gseqinfo", argc, argv, "GEMBASSY", "1.0.1");

  struct soap soap;

  AjPSeqall seqall;
  AjPSeq    seq;
  AjPStr    inseq  = NULL;
  AjPStr    seqid  = NULL;
  AjPStr    tmp    = NULL;
  AjPStr    parse  = NULL;
  AjPStr    numA   = NULL;
  AjPStr    numT   = NULL;
  AjPStr    numG   = NULL;
  AjPStr    numC   = NULL;
  AjPStrTok handle = NULL;

  ajint n;

  char *in0;
  char *result;

  AjBool  show = 0;
  AjPFile outf = NULL;

  seqall = ajAcdGetSeqall("sequence");

  outf = ajAcdGetOutfile("outfile");

  while(ajSeqallNext(seqall, &seq))
    {

      soap_init(&soap);

      inseq = NULL;

      ajStrAppendC(&inseq, ">");
      ajStrAppendS(&inseq, ajSeqGetNameS(seq));
      ajStrAppendC(&inseq, "\n");
      ajStrAppendS(&inseq, ajSeqGetSeqS(seq));

      ajStrAssignS(&seqid, ajSeqGetAccS(seq));

      in0 = ajCharNewS(inseq);

      if(soap_call_ns1__seqinfo(
			       &soap,
                                NULL,
                                NULL,
			        in0,
                               &result
			       ) == SOAP_OK)
        {
          tmp = ajStrNewC(result);

          ajStrExchangeCC(&tmp, "<", "\n");
          ajStrExchangeCC(&tmp, ">", "\n");

          handle = ajStrTokenNewC(tmp, "\n");

          while(ajStrTokenNextParse(handle, &parse))
            {
              if(ajStrIsInt(parse))
                if(!numA)
                  numA = ajStrNewS(parse);
                else if(!numT)
                  numT = ajStrNewS(parse);
                else if(!numG)
                  numG = ajStrNewS(parse);
                else if(!numC)
                  numC = ajStrNewS(parse);
            }
          if(show)
            ajFmtPrint("Sequence: %S A: %S T: %S G: %S C: %S\n",
                       seqid, numA, numT, numG, numC);
          else
            ajFmtPrintF(outf, "Sequence: %S A: %S T: %S G: %S C: %S\n",
                        seqid, numA, numT, numG, numC);
        }
      else
        {
          soap_print_fault(&soap, stderr);
        }

      soap_destroy(&soap);
      soap_end(&soap);
      soap_done(&soap);

      AJFREE(in0);

      ajStrDel(&inseq);
  }

  ajSeqallDel(&seqall);
  ajSeqDel(&seq);
  ajStrDel(&seqid);

  embExit();

  return 0;
}
Ejemplo n.º 25
0
AjPSeqout get_orf(
  AjPSeqout seqout,
  AjPSeqall seqall,
  AjPStr tablestr,
  ajuint minsize,
  ajuint maxsize,
  AjPStr findstr,
  AjBool methionine,
  AjBool circular,
  AjBool reverse,
  ajint around
) {
  ajint table;
  ajint find;
  AjPSeq seq = NULL;
  AjPTrn trnTable;
  AjPStr sseq = NULL;    /* sequence string */

  /* ORF number to append to name of sequence to create unique name */
  ajint orf_no;


  AjBool sense;    /* ajTrue = forward sense */
  ajint len;

  /* initialise the translation table */
  ajStrToInt(tablestr, &table);
  trnTable = ajTrnNewI(table);

  /* what sort of ORF are we looking for */
  ajStrToInt(findstr, &find);

  /*
  ** get the minimum size converted to protein length if storing
  ** protein sequences
  */
  if (find == P_STOP2STOP || find == P_START2STOP || find == AROUND_START) {
    minsize /= 3;
    maxsize /= 3;
  }

  while (ajSeqallNext(seqall, &seq)) {
    orf_no = 1;           /* number of the next ORF */
    sense = ajTrue;           /* forward sense initially */

    /* get the length of the sequence */
    len = ajSeqGetLen(seq);

    /*
    ** if the sequence is circular, append it to itself to triple its
    **  length so can deal easily with wrapped ORFs, but don't update
    ** len
    */
    if (circular) {
      ajStrAssignS(&sseq, ajSeqGetSeqS(seq));
      ajStrAppendS(&sseq, ajSeqGetSeqS(seq));
      ajStrAppendS(&sseq, ajSeqGetSeqS(seq));
      ajSeqAssignSeqS(seq, sseq);
    }

    /* find the ORFs */
    getorf_FindORFs(seq, len, trnTable, minsize, maxsize, seqout, sense,
                    circular, find, &orf_no, methionine, around, &record);

    /* now reverse complement the sequence and do it again */
    if (reverse) {
      sense = ajFalse;
      ajSeqReverseForce(seq);
      getorf_FindORFs(seq, len, trnTable, minsize, maxsize, seqout, sense,
                      circular, find, &orf_no, methionine,
                      around);
    }
  }
  ajTrnDel(&trnTable);
  ajSeqDel(&seq);
  ajStrDel(&sseq);
}