Пример #1
0
/* @funcstatic seqwords_keysearch  ********************************************
**
** Search swissprot with terms structure and writes a hitlist structure
**
** @param [r] inf   [AjPFile]     File pointer to swissprot database
** @param [r] terms [AjPTerms]    Terms object pointer
** @param [w] hits  [EmbPHitlist*] Hitlist object pointer
**
** @return [AjBool] True on success
** @@
******************************************************************************/
static AjBool seqwords_keysearch(AjPFile inf, 
				 AjPTerms terms,
				 EmbPHitlist *hits)
{
    AjPStr   line           =NULL;	/* Line of text. */
    AjPStr   id             =NULL;	/* Line of text. */
    AjPStr   temp           =NULL;
    ajint    s              =0;         /* Temp. start of hit value. */
    ajint    e              =0;         /* Temp. end of hit value. */
    AjPInt   start          =NULL;      /* Array of start of hit(s). */
    AjPInt   end            =NULL;      /* Array of end of hit(s). */
    ajint    nhits          =0;         /* Number of hits. */
    ajint    x              =0;         
    AjBool   foundkw        =ajFalse;
    AjBool   foundft        =ajFalse;


    /* Check for valid args. */
    if(!inf)
	return ajFalse;
    

    

    /* Allocate strings and arrays. */
    line       = ajStrNew();
    id         = ajStrNew();
    start      = ajIntNew();
    end        = ajIntNew();



    /* Start of main loop. */
    while((ajReadlineTrim(inf,&line)))
    {
	/* Parse the AC line. */
	if(ajStrPrefixC(line,"AC"))
	{
	    /* Copy accesion number and remove the ';' from the end. */
	    ajFmtScanS(line, "%*s %S", &id);
	    ajStrExchangeCC(&id, ";", "\0");
	    
	    
	    /* Reset flags & no. hits. */
	    foundkw=ajFalse;
	    foundft=ajFalse;
	    nhits=0;
	}
	
	
	/* Search the description and keyword lines with search terms. */
	else if((ajStrPrefixC(line,"DE") || (ajStrPrefixC(line,"KW"))))
	{
	    /* 
	     ** Search terms have a leading and trailing space to prevent 
	     ** them being found as substrings within other words.  To 
	     ** catch cases where a DE or KW line begins with a search 
	     ** term, we must add a leading and trailing space to line.
	     ** We must first remove punctation from the line to be parsed.
	     */
	    ajStrExchangeSetCC(&line, ".,;:", "    ");
	    ajStrAppendK(&line, ' ');
	    ajStrInsertC(&line, 0, " ");


	    for (x = 0; x < terms->N; x++)
		/* Search term is found. */
		if((ajStrFindCaseS(line, terms->Keywords[x])!=-1))
		{	
		    foundkw=ajTrue;
		    break;
		}
	}

	
	/* Search the feature table line with search terms. */
	else if((ajStrPrefixC(line,"FT   DOMAIN")))
	{
	    /*	
	     ** Search terms have a leading and trailing space to prevent 
	     ** them being found as substrings within other words.  To 
	     ** catch cases where a FT line ends with a search 
	     ** term, we must add a  trailing space to line 
	     ** We must first remove punctation from the line to be parsed.
	     */
	    ajStrExchangeSetCC(&line, ".,;:", "    ");
	    ajStrAppendK(&line, ' ');
	    

	    for (x = 0; x < terms->N; x++)
		if((ajStrFindCaseS(line, terms->Keywords[x])!=-1))
		{
		    /* Search term is found. */
		    foundft = ajTrue;
		    nhits++;
		    
		    /* Assign start and end of hit. */
		    ajFmtScanS(line, "%*s %*s %d %d", &s, &e);


		    ajIntPut(&start, nhits-1, s);
		    ajIntPut(&end, nhits-1, e);
		    break;
		}
	}
	

	/* Parse the sequence. */
	else if((ajStrPrefixC(line,"SQ") && ((foundkw == ajTrue) ||
					     (foundft == ajTrue))))
	{
	    /* Allocate memory for temp. sequence. */
	    temp       = ajStrNew();


	    /* Read the sequence into hitlist structure. */
	    while((ajReadlineTrim(inf,&line)) && !ajStrPrefixC(line,"//"))
		/* Read sequence line into temp. */
		ajStrAppendC(&temp,ajStrGetPtr(line)+3);
 

	    /* Clean up temp. sequence. */
	    ajStrRemoveWhite(&temp);


	    /*Priority is given to domain (rather than full length) sequence.*/
	    if(foundft)
	    {
		for(x=0;x<nhits;x++)
		{
		    /* Increment counter of hits for subsequent hits*/
		    (*hits)->N++;

		    
		    /* Reallocate memory for array of hits in hitlist
                       structure. */
		    AJCRESIZE((*hits)->hits, (*hits)->N);
		    (*hits)->hits[(*hits)->N-1]=embHitNew();
		    ajStrAssignC(&(*hits)->hits[(*hits)->N-1]->Model,
				 "KEYWORD");
		    

		    /* Assign start and end of hit. */
		    (*hits)->hits[(*hits)->N-1]->Start = ajIntGet(start, x);
		    (*hits)->hits[(*hits)->N-1]->End = ajIntGet(end, x);
				

		    /* Extract sequence within specified range */
		    ajStrAssignSubS(&(*hits)->hits[(*hits)->N - 1]->Seq, temp, 
				(*hits)->hits[(*hits)->N - 1]->Start - 1, 
				(*hits)->hits[(*hits)->N - 1]->End - 1);
		    

		    /* Put id into structure */
		    ajStrAssignRef(&(*hits)->hits[(*hits)->N - 1]->Acc, id);
		}
	    }
	    else
	    {
		/* Increment counter of hits */
		(*hits)->N++;

		    
		/* Reallocate memory for array of hits in hitlist structure */
		AJCRESIZE((*hits)->hits, (*hits)->N);
		(*hits)->hits[(*hits)->N-1]=embHitNew();
		ajStrAssignC(&(*hits)->hits[(*hits)->N-1]->Model, "KEYWORD");

		/* Extract whole sequence */
		ajStrAssignRef(&(*hits)->hits[(*hits)->N - 1]->Seq, temp); 
		(*hits)->hits[(*hits)->N - 1]->Start = 1; 
		(*hits)->hits[(*hits)->N - 1]->End =
		    ajStrGetLen((*hits)->hits[(*hits)->N - 1]->Seq); 


		/* Put id into structure */
		ajStrAssignRef(&(*hits)->hits[(*hits)->N - 1]->Acc, id);
	    }

	    /* Free temp. sequence */
	    ajStrDel(&temp);
	}
    }


    /* Clean up */
    ajStrDel(&line);
    ajStrDel(&id);
    ajIntDel(&start);
    ajIntDel(&end);

    return ajTrue;
}
Пример #2
0
int main(int argc, char **argv)
{
    AjPSeqall seqall;
    AjPSeq seq    = NULL;
    AjPFile outf  = NULL;
    AjPStr strand = NULL;
    AjPStr substr = NULL;
    AjPStr bases  = NULL;

    ajint begin;
    ajint end;
    ajint len;

    ajint minlen;
    float minobsexp;
    float minpc;

    ajint window;
    ajint shift;
    ajint plotstart;
    ajint plotend;

    float  *xypc   = NULL;
    float  *obsexp = NULL;
    AjBool *thresh = NULL;
    float  obsexpmax;

    ajint i;
    ajint maxarr;


    embInit("newcpgreport",argc,argv);

    seqall    = ajAcdGetSeqall("sequence");
    window    = ajAcdGetInt("window");
    shift     = ajAcdGetInt("shift");
    outf      = ajAcdGetOutfile("outfile");
    minobsexp = ajAcdGetFloat("minoe");
    minlen    = ajAcdGetInt("minlen");
    minpc     = ajAcdGetFloat("minpc");

    substr = ajStrNew();
    bases  = ajStrNewC("CG");
    maxarr = 0;

    while(ajSeqallNext(seqall, &seq))
    {
	begin = ajSeqallGetseqBegin(seqall);
	end   = ajSeqallGetseqEnd(seqall);
	strand = ajSeqGetSeqCopyS(seq);
	ajStrFmtUpper(&strand);

	ajStrAssignSubC(&substr,ajStrGetPtr(strand),--begin,--end);
	len=ajStrGetLen(substr);

	if(len > maxarr)
	{
	    AJCRESIZE(obsexp, len);
	    AJCRESIZE(thresh, len);
	    AJCRESIZE(xypc, len);
	    maxarr = len;
	}
	for(i=0;i<len;++i)
	    obsexp[i]=xypc[i]=0.0;


	newcpgreport_findbases(substr, len, window, shift, obsexp,
			       xypc, bases, &obsexpmax, &plotstart, &plotend);

	newcpgreport_identify(outf, obsexp, xypc, thresh, 0, len, shift,
			      ajStrGetPtr(bases), ajSeqGetNameC(seq), minlen,
			      minobsexp, minpc, ajStrGetPtr(strand));

	ajStrDel(&strand);
    }

    ajStrDel(&bases);

    ajSeqDel(&seq);
    ajStrDel(&substr);
    ajFileClose(&outf);

    AJFREE(obsexp);
    AJFREE(thresh);
    AJFREE(xypc);

    ajSeqallDel(&seqall);

    embExit();

    return 0;
}
Пример #3
0
int main(int argc, char **argv)
{
    AjPAlign align;
    AjPSeq a;
    AjPSeq b;
    AjPSeqout seqout;

    AjPStr m;
    AjPStr n;

    AjPStr merged = NULL;

    ajuint lena;
    ajuint lenb;

    const char   *p;
    const char   *q;

    ajint start1 = 0;
    ajint start2 = 0;

    float *path;
    ajint *compass;

    AjPMatrixf matrix;
    AjPSeqCvt  cvt = 0;
    float **sub;

    float gapopen;
    float gapextend;
    ajulong maxarr = 1000;
    ajulong len;		  /* arbitrary. realloc'd if needed */
    size_t  stlen;

    float score;
    ajint begina;
    ajint beginb;

    embInit("merger", argc, argv);

    a         = ajAcdGetSeq("asequence");
    b         = ajAcdGetSeq("bsequence");
    seqout    = ajAcdGetSeqout("outseq");
    matrix    = ajAcdGetMatrixf("datafile");
    gapopen   = ajAcdGetFloat("gapopen");
    gapextend = ajAcdGetFloat("gapextend");
    align     = ajAcdGetAlign("outfile");

    gapopen = ajRoundFloat(gapopen, 8);
    gapextend = ajRoundFloat(gapextend, 8);

    AJCNEW(path, maxarr);
    AJCNEW(compass, maxarr);

    /*
    **  make the two sequences lowercase so we can show which one we are
    **  using in the merge by uppercasing it
    */

    ajSeqFmtLower(a);
    ajSeqFmtLower(b);

    m = ajStrNew();
    n = ajStrNew();

    sub = ajMatrixfGetMatrix(matrix);
    cvt = ajMatrixfGetCvt(matrix);

    begina = ajSeqGetBegin(a);
    beginb = ajSeqGetBegin(b);

    lena = ajSeqGetLen(a);
    lenb = ajSeqGetLen(b);

    if(lenb > (ULONG_MAX/(ajulong)(lena+1)))
	ajFatal("Sequences too big. Try 'supermatcher'");

    len  = lena*lenb;

    if(len>maxarr)
    {

	ajDebug("merger: resize path, len to %d (%d * $d)\n",
		len, lena, lenb);

	stlen = (size_t) len;
        AJCRESIZE(path,stlen);
        AJCRESIZE(compass,stlen);
        maxarr=len;
    }


    p = ajSeqGetSeqC(a);
    q = ajSeqGetSeqC(b);

    ajStrAssignC(&m,"");
    ajStrAssignC(&n,"");

    score = embAlignPathCalc(p,q,lena,lenb,gapopen,gapextend,path,sub,cvt,
		     compass, ajFalse);

    /*score = embAlignScoreNWMatrix(path,compass,gapopen,gapextend,
                                  a,b,lena,lenb,sub,cvt,
				  &start1,&start2);*/

    embAlignWalkNWMatrix(path,a,b,&m,&n,lena,lenb, &start1,&start2,gapopen,
			 gapextend,compass);

    /*
    ** now construct the merged sequence, uppercase the bits of the two
    ** input sequences which are used in the merger
    */
    merger_Merge(align, &merged,p,q,m,n,start1,start2,
		 ajSeqGetNameC(a),ajSeqGetNameC(b));

    embAlignReportGlobal(align, a, b, m, n,
			 start1, start2, gapopen, gapextend,
			 score, matrix, begina, beginb);

    ajAlignWrite(align);
    ajAlignReset(align);

    /* write the merged sequence */
    ajSeqAssignSeqS(a, merged);
    ajSeqoutWriteSeq(seqout, a);
    ajSeqoutClose(seqout);
    ajSeqoutDel(&seqout);

    ajSeqDel(&a);
    ajSeqDel(&b);

    ajAlignClose(align);
    ajAlignDel(&align);
    ajStrDel(&merged);

    AJFREE(compass);
    AJFREE(path);

    ajStrDel(&n);
    ajStrDel(&m);

    embExit();

    return 0;
}
Пример #4
0
static AjBool  ssematch_NWScore(AjPScop temp_scop, 
				AjPSeq pseq, 
				ajint mode, 
				AjPMatrixf matrix,
				float gapopen, 
				float gapextend)


{
    ajint         start1  =0;	/* Start of seq 1, passed as arg but not used.*/
    ajint         start2  =0;	/* Start of seq 2, passed as arg but not used.*/
    ajint         maxarr  =300;	/* Initial size for matrix.                   */
    ajint         len;

    ajint        *compass;

    const char       *p;        /* Query sequence.                            */
    const char       *q;        /* Subject sequence from scop object.         */

    float     **sub;
    float       id       =0.;	/* Passed as arg but not used here.           */
    float       sim      =0.;	
    float       idx      =0.;	/* Passed as arg but not used here.           */
    float       simx     =0.;	/* Passed as arg but not used here.           */
    float      *path;

    AjPStr      pstr = NULL;	/*  m walk alignment for first sequence 
				    Passed as arg but not used here.          */
    AjPStr      qstr = NULL;	/*  n walk alignment for second sequence 
				    Passed as arg but not used here.          */

    AjPSeq      qseq = NULL;    /* Subject sequence.                          */

   
    ajint lenp;                 /* Length of query sequence.                  */
    ajint lenq;                 /* Length of subject sequence.                */
    

    AjPSeqCvt   cvt  = 0;
    AjBool      show = ajFalse; /*Passed as arg but not used here.            */






    AJCNEW(path, maxarr);
    AJCNEW(compass, maxarr);
    pstr = ajStrNew();    
    qstr = ajStrNew();    
    gapopen   = ajRoundFloat(gapopen,8);
    gapextend = ajRoundFloat(gapextend,8);
    sub = ajMatrixfGetMatrix(matrix);
    cvt = ajMatrixfGetCvt(matrix);


    
    
    /* Extract subject sequence from scop object, convert to 3 letter code. */
    if (mode == 0)
        qseq = ssematch_convertbases(temp_scop->Sse);
    else if (mode == 1)
        qseq = ssematch_convertbases(temp_scop->Sss);


    lenp = ajSeqGetLen(pseq); /* Length of query sequence.   */
    lenq = ajSeqGetLen(qseq); /* Length of subject sequence. */
   

   


    /* Start of main application loop */
    /* Intitialise variables for use by alignment functions*/	    
    len = (lenp * lenq);

    if(len>maxarr)
    {
	AJCRESIZE(path,len);
	AJCRESIZE(compass,len);
	maxarr=len;
    }

    p = ajSeqGetSeqC(pseq); 
    q = ajSeqGetSeqC(qseq); 

    ajStrAssignC(&pstr,"");
    ajStrAssignC(&qstr,"");


    /* Check that no sequence length is 0. */
    if((lenp == 0)||(lenq == 0))
    {
       	AJFREE(compass);
	AJFREE(path);
	ajStrDel(&pstr);
	ajStrDel(&qstr);
    }


    /* Call alignment functions. */
    embAlignPathCalc(p,q,lenp,lenq, gapopen,
		     gapextend,path,sub,cvt,compass,show);

    /*embAlignScoreNWMatrix(path,compass,gapopen,gapextend,
                          pseq, qseq,
			  lenp,lenq,sub,cvt,
			  &start1,&start2);*/

    embAlignWalkNWMatrix(path,pseq,qseq,&pstr,&qstr,
			 lenp,lenq,&start1,&start2,
                         gapopen,gapextend,compass);

    embAlignCalcSimilarity(pstr,qstr,sub,cvt,lenp,
			   lenq,&id,&sim,&idx, &simx);


    /* Assign score. */
    
    temp_scop->Score = sim;
    
  



    /* Tidy up */
    AJFREE(compass);
    AJFREE(path);
    ajStrDel(&pstr);
    ajStrDel(&qstr);
    ajSeqDel(&qseq); 

    
    /* Bye Bye */
    return ajTrue;
}    
Пример #5
0
/* @funcstatic newcoils_read_matrix *******************************************
**
** Reads the matrix and stores in a hept_pref structure
**
** @param [u] inf [AjPFile] matrix input file
** @return [struct hept_pref*] Matrix data for heptad preference
******************************************************************************/
static struct hept_pref* newcoils_read_matrix(AjPFile inf)
{
    ajint i;
    ajint j;
    ajint pt;
    ajint aa_len;
    ajint win;

    float m_g;
    float sd_g;
    float m_cc;
    float sd_cc;
    float sc;
    float hept[NCHEPTAD];

    AjPStr buff;
    const char   *pbuff;
    
    struct hept_pref *h;


    buff = ajStrNew();

    aa_len = strlen(NCAAs);

    AJNEW(h);
    AJCNEW(h->m,aa_len);

    for(i=0; i<aa_len; ++i)
    {
	AJCNEW(h->m[i],NCHEPTAD);
	for(j=0; j<NCHEPTAD; ++j)
	    h->m[i][j] = -1;
    }

    AJNEW(h->f);

    h->n = 0;
    h->smallest = 1.0;

    while(ajReadlineTrim(inf,&buff))
    {
	pbuff = ajStrGetPtr(buff);
	if(*pbuff != '%')
	{
	    if((strncmp(pbuff,"uw ",3)==0) || (strncmp(pbuff,"w ",2)==0))
	    {
		i = h->n;
		if(strncmp(pbuff,"uw ",3)==0)
		    h->f[i].w = 0;
		else
		    h->f[i].w = 1;

		ajFmtScanS(buff,"%*s %d %f %f %f %f %f",&win,&m_cc,
			   &sd_cc,&m_g,&sd_g,&sc);

		h->f[i].win   = win;
		h->f[i].m_cc  = (float)m_cc; 
		h->f[i].sd_cc = (float)sd_cc;
		h->f[i].m_g   = (float)m_g;
		h->f[i].sd_g  = (float)sd_g;
		h->f[i].sc    = (float)sc;
		h->n++;

		AJCRESIZE(h->f,(h->n)+1);
		
		if((h->n)>=9)
		    ajFatal("Too many window parms in matrix file\n");

	    }
	    else if(*pbuff>='A' && *pbuff<='Z')
	    {
		/* AA data */
		pt = (int)(pbuff[0]-'A');
		if(h->m[pt][0]==-1)
		{
		    ajFmtScanS(buff,"%*s%f%f%f%f%f%f%f",&hept[0],
			       &hept[1],&hept[2],&hept[3],&hept[4],
			       &hept[5],&hept[6]);

		    for(i=0; i<NCHEPTAD; ++i)
		    {
			h->m[pt][i] = (float)hept[i];
			if(h->m[pt][i]>0)
			{
			    if(h->m[pt][i]<h->smallest)
				h->smallest = h->m[pt][i];
			}
			else
			    h->m[pt][i]=-1; /* Don't permit zero values */
		    }

		}
		else
		    ajWarn("multiple entries for AA %c in matrix file\n",
			   *pbuff);
	    }
	    else
	    {
		ajWarn("strange characters in matrix file\n");
		ajWarn("Ignoring line: %S\n",buff);
	    }
	}
    }


    ajStrDel(&buff);

    return h;
}
Пример #6
0
int main(int argc, char **argv)
{
    AjPSeqall seqall;
    AjPSeq seq   = NULL;
    AjPFile inf  = NULL;

    AjPStr strand = NULL;
    AjPStr substr = NULL;
    AjPStr name   = NULL;
    AjPStr mname  = NULL;
    AjPStr tname  = NULL;
    AjPStr pname  = NULL;
    AjPStr line   = NULL;
    AjPStr cons   = NULL;
    AjPStr m      = NULL;
    AjPStr n      = NULL;

    AjPAlign align= NULL; /* JISON, replaces AjPOutfile outf */
    
    ajint type;
    ajint begin;
    ajint end;
    ajulong len;
    ajint i;
    ajint j;

    float **fmatrix=NULL;

    ajint mlen;
    float maxfs;
    ajint thresh;

    float gapopen;
    float gapextend;
    float opencoeff;
    float extendcoeff;

    const char *p;

    ajulong maxarr = 1000;
    ajulong alen;
    float *path;
    ajint *compass;
    size_t stlen;

    embInit("prophet", argc, argv);

    seqall      = ajAcdGetSeqall("sequence");
    inf         = ajAcdGetInfile("infile");
    opencoeff   = ajAcdGetFloat("gapopen");
    extendcoeff = ajAcdGetFloat("gapextend");
    align       = ajAcdGetAlign("outfile");  /*JISON replacing outfile */

    opencoeff   = ajRoundFloat(opencoeff, 8);
    extendcoeff = ajRoundFloat(extendcoeff, 8);

    substr = ajStrNew();
    name   = ajStrNew();
    mname  = ajStrNew();
    tname  = ajStrNew();
    line   = ajStrNew();
    m      = ajStrNewC("");
    n      = ajStrNewC("");

    type = prophet_getType(inf,&tname);
    if(!type)
      ajFatal("Unrecognised profile/matrix file format");

    prophet_read_profile(inf,&pname,&mname,&mlen,&gapopen,&gapextend,&thresh,
			 &maxfs, &cons);
    ajAlignSetMatrixName(align, mname);
    AJCNEW(fmatrix, mlen);

    for(i=0;i<mlen;++i)
    {
	AJCNEW(fmatrix[i], AZ);
	if(!ajReadlineTrim(inf,&line))
	    ajFatal("Missing matrix line");
	p = ajStrGetPtr(line);
	p = ajSysFuncStrtok(p," \t");
	for(j=0;j<AZ;++j)
	{
	    sscanf(p,"%f",&fmatrix[i][j]);
	    p = ajSysFuncStrtok(NULL," \t");
	}
    }

    AJCNEW(path, maxarr);
    AJCNEW(compass, maxarr);

    while(ajSeqallNext(seqall, &seq))
    {
	begin = ajSeqallGetseqBegin(seqall);
	end   = ajSeqallGetseqEnd(seqall);

	ajStrAssignC(&name,ajSeqGetNameC(seq));
	strand = ajSeqGetSeqCopyS(seq);

	ajStrAssignSubC(&substr,ajStrGetPtr(strand),begin-1,end-1);
	len = ajStrGetLen(substr);

	if(len > (ULONG_MAX/(ajulong)(mlen+1)))
	    ajFatal("Sequences too big. Try 'supermatcher'");

	alen = len*mlen;
	if(alen>maxarr)
	{
	    stlen = (size_t) alen;
	    AJCRESIZE(path,stlen);
	    AJCRESIZE(compass,stlen);
	    maxarr=alen;
	}

	ajStrAssignC(&m,"");
	ajStrAssignC(&n,"");

	/* JISON used to be
	prophet_scan_profile(substr,pname,name,mlen,fmatrix,
			     outf,cons,opencoeff,
			     extendcoeff,path,compass,&m,&n,len); */

	/* JISON new call and reset align */
	prophet_scan_profile(substr,name,pname,mlen,fmatrix,
			     align,cons,opencoeff,
			     extendcoeff,path,compass,&m,&n,(ajint)len); 
	ajAlignReset(align);
	
	ajStrDel(&strand);
    }

    for(i=0;i<mlen;++i)
	AJFREE (fmatrix[i]);
    AJFREE (fmatrix);

    AJFREE(path);
    AJFREE(compass);

    ajStrDel(&line);
    ajStrDel(&cons);
    ajStrDel(&name);
    ajStrDel(&pname);
    ajStrDel(&mname);
    ajStrDel(&tname);
    ajStrDel(&substr);
    ajStrDel(&m);
    ajStrDel(&n);
    ajSeqDel(&seq);
    ajFileClose(&inf);

    ajAlignClose(align);
    ajAlignDel(&align);
    ajSeqallDel(&seqall);
    embExit();

    return 0;
}