Esempio n. 1
0
static void acdrelations_readdatfile
            (AjPFile inf, 
	     PEdam *P)
{
  AjPStr  line           = NULL;
  const AjPStr  tok      = NULL;
  const AjPStr  subtok   = NULL;
  AjPStr  strtmp         = NULL;
  AjPList strlist        = NULL;

  AjPStr  acdtype        = NULL;
  AjPStr  relations      = NULL;

  PEdamdat dattmp        = NULL;
  AjPList  datlist       = NULL;
  
  if(!P)
    ajFatal("Null arg error 1 in acdrelations_readdatfile");
  if(!inf)
    ajFatal("Null arg error 3 in acdrelations_readdatfile");  


  /* Allocate memory */
  line      = ajStrNew();
  acdtype   = ajStrNew();
  relations = ajStrNew();
  datlist   = ajListNew();

  /* Read data from file */
  while(ajReadline(inf,&line))
    {
      /* Discard comment lines */
      if(ajStrPrefixC(line,"#")) 
	continue;

      
      
      /* Tokenise line, delimited by '|'.
         Parse first token (ACD datatype ) */
      ajStrAssignS(&acdtype, ajStrParseC(line, "|")); 
      
      /* Parse second token (EDAM relations: value ) */
      ajStrAssignS(&relations, ajStrParseC(NULL, "|")); 

      /* Parse third token (attribute:value strings block) */
      tok = ajStrParseC(NULL, "|");
        

      /* Create new string list */
      strlist = ajListstrNew();

      /* Tokenise third token itself into tokens delimited by ' ' (space)
         Parse tokens (individual attribute:value strings)*/
      if((subtok=ajStrParseC(tok, ";")))
      {
          strtmp = ajStrNew();
          ajStrAssignS(&strtmp, subtok);
          ajStrRemoveWhite(&strtmp);
          ajListstrPushAppend(strlist, strtmp);
              
          while((subtok=ajStrParseC(NULL, ";")))
          {
              strtmp = ajStrNew();
              ajStrAssignS(&strtmp, subtok);
              ajStrRemoveWhite(&strtmp);
              ajListstrPushAppend(strlist, strtmp);
          }
      }
      
      /* Write PEdamdat structure & push onto list */
      dattmp = ajEdamdatNew();
      ajStrRemoveWhite(&acdtype);
      ajStrAssignS(&dattmp->acdtype, acdtype);
      ajStrAssignS(&dattmp->edam, relations);
      dattmp->n = ajListstrToarray(strlist, &dattmp->acdattr);
      ajListPushAppend(datlist, dattmp);
      
      /* Clear nodes (but not strings) from string list */
      ajListstrFree(&strlist);
    }
  

  /* Write PEdam structure */
  ((*P)->n) = ajListToarray(datlist, (void***) &((*P)->dat));

  /* Free memory */
  ajStrDel(&line);
  ajStrDel(&acdtype);
  ajStrDel(&relations);
  ajListFree(&datlist);

  return;
}
Esempio n. 2
0
/* @funcstatic acdrelations_writerelations ************************************
**
** Writes relations: attribute for an ACD data definition
** The relations: values given in knowntypes.standard have highest precedence,
** then the values given in edamtoacd.dat
** Attribute values for a given datatype in edamtoacd.dat are in order of
** increasing precedence, i.e. the last line is highest and the relations:
** value will be used if all conditions are met.
**
** @param [r] outf    [AjPFile] ACD output file
** @param [r] acdtype [AjPStr ] ACD datatype, e.g. "align"
** @param [r] strarr  [AjPStr*] All ACD attribute lines (whitespace removed)
**                              for the the current ACD data item (of type
**                              acdtype).  One line per array element.
** @param [r] n       [ajint]   Size of strarr
** @param [r] P       [PEdam]   edam object to write
** @param [r] T       [PKtype]  ktype object to read
** @return [void] 
** @@
******************************************************************************/
static void acdrelations_writerelations
            (AjPFile outf, 
	     AjPStr  acdtype,
	     AjPStr *strarr, 
             ajint   n,
	     PEdam   P,
    	     PKtype  T)
{
    ajint  i         = 0;
    ajint  j         = 0;
    ajint  k         = 0;
    ajint  nmatch    = 0;
    AjPStr relations = NULL;
    AjPStr ktype     = NULL;     /* Value of knowntype: attribute */
    AjBool done      = ajFalse;
    AjBool donetype  = ajFalse;
    AjPStr tmpstr    = NULL;
    
    
    if(!outf || !acdtype || !strarr || !n || !P)
        ajFatal("NULL args passed to acdrelations_writerelations");

    /* Memory allocation */
    relations = ajStrNew();
    ktype     = ajStrNew();
    tmpstr    = ajStrNew();

    
    /* Loop through all lines in edamtoacd.dat */
    for(i=0; i<P->n ;i++)
    {
        /* Found matching datatype */
        if(ajStrMatchS(acdtype, P->dat[i]->acdtype))
        {
            /* Copy first relations: string defined for this datatype (default) */
            ajStrAssignS(&relations, P->dat[i]->edam);
            done = ajTrue;
            i++;

            /* Check next line in edamtoacd.dat */
            for( ; i<P->n; i++)
            {
                /* Datatype still matches */
                if(ajStrMatchS(acdtype, P->dat[i]->acdtype))
                {
                    /* Loop through all required attributes for this datatype */
                    for(nmatch=0, j=0; j<P->dat[i]->n; j++)
                    {
                        /* Loop through all attribute lines for the data defininition */
                        for(k=0; k<n; k++)
                            if(ajStrMatchS(P->dat[i]->acdattr[j], strarr[k]))
                            {
                                nmatch++;
                                /* ajFmtPrint("Found match %d:  %S:%S\n", nmatch, P->dat[i]->acdattr[j], strarr[k]); */
                                
                                break;
                            }
                    }
                    /* All attribute values match */
                    if(nmatch == P->dat[i]->n)
                        ajStrAssignS(&relations, P->dat[i]->edam);
                    /* Should never happen */
                    else if (nmatch > P->dat[i]->n)
                        ajFatal("Terminal weirdness in acdrelations_writerelations");
                }
                else
                    break;
            }
            break;            
        }
    }

    /* Check for match of knowntype: attribute against knowntypes.standard.
       These have higher precedence than the rules defined in edamtoacd.dat */
    for(donetype=ajFalse, i=0; i<n; i++)
    {
        if(ajStrPrefixC(strarr[i], "knowntype:"))
        {
            
            for(j=0;j<T->n; j++)
            {
                /* No check is made on the "Type" column in knowntypes.standard
                   as these are not proper ACD datatype names
                   To check these add
                   if(ajStrMatchS(acdtype, T->dat[j]->acdtype)) */
                
                ajFmtPrintS(&tmpstr, "knowntype:\"%S\"", T->dat[j]->ktype);

                    if(ajStrMatchS(tmpstr, strarr[i]))
                    {
                        ajStrAssignS(&relations, T->dat[j]->edam);
                        donetype=ajTrue;
                        break;
                    }
            }
            if(donetype)
                break;
        }
        else
            continue;
    }
    
    
    if(!done)
        ajFatal("No matching datatype (%S) in acdrelations_writerelations", acdtype);
    
    
    /* Write relations: attribute line to file */
    ajFmtPrintF(outf, "    relations:%S\n", relations);


    /* Free memory */
    ajStrDel(&relations);
    ajStrDel(&ktype);
    ajStrDel(&tmpstr);
    
    return;
}
Esempio n. 3
0
/* @prog seqnr **************************************************************
**
** Removes redundancy from DHF files (domain hits files) or other files of 
** sequences.
**
****************************************************************************/
int main(int argc, char **argv)
{
    /* Variable declarations */
    AjPList    in        = NULL;    /* Names of domain hits files (input).   */
    AjPStr     inname    = NULL;    /* Full name of the current DHF file.    */
    AjPFile    inf       = NULL;    /* Current DHF file.                     */
    EmbPHitlist infhits   = NULL;   /* Hitlist from DHF file                 */
    AjBool     dosing    = ajFalse; /* Filter using singlet sequences.       */
    AjPDir     singlets  = NULL;    /* Singlets (input).                     */
    AjBool     dosets    = ajFalse; /* Filter using sets of sequences.       */
    AjPDir     insets    = NULL;    /* Sets (input).                         */
    AjPStr     mode      = NULL;    /* Mode of operation                     */
    ajint      moden     = 0;       /* Mode 1: single threshold for redundancy
				       removal, 2: lower and upper thresholds
				       for redundancy removal.               */
    float      thresh    = 0.0;     /* Threshold for non-redundancy.         */
    float      threshlow = 0.0;	    /* Threshold (lower limit).              */
    float      threshup  = 0.0;	    /* Threshold (upper limit).              */
    AjPMatrixf matrix    = NULL;    /* Substitution matrix.                  */
    float      gapopen   = 0.0;     /* Gap insertion penalty.                */
    float      gapextend = 0.0;     /* Gap extension penalty.                */
    AjPDirout  out       = NULL;    /* Domain hits files (output).           */
    AjPFile    outf      = NULL;    /* Current DHF file (output).            */
    AjBool     dored     = ajFalse; /* True if redundant hits are output.    */
    AjPDirout  outred    = NULL;    /* DHF files for redundant hits (output).*/
    AjPFile    redf      = NULL;    /* Current DHF file redundancy (output). */
    AjPStr     outname   = NULL;    /* Name of output file (re-used).        */
    AjPFile    logf      = NULL;    /* Log file pointer.                     */
 
    AjBool     ok        = ajFalse; /* Housekeeping.                         */
    AjPSeqset  seqset    = NULL;    /* Seqset (re-used).                     */
    AjPSeqin   seqin     = NULL;    /* Seqin (re-used).                      */
    AjPList    seq_list  = NULL;    /* Main list for redundancy removal.     */
    EmbPDmxNrseq seq_tmp = NULL;    /* Temp. pointer for making seq_list.    */
    ajint      seq_siz   = 0;       /* Size of seq_list.                     */
    AjPUint    keep      = NULL;    /* 1: Sequence in seq_list was classed as
				       non-redundant, 0: redundant.          */
    AjPUint    nokeep    = NULL;    /* Inversion of keep array.              */
    ajint      nseqnr    = 0;       /* No. non-redundant seqs. in seq_list.  */
    

    AjPStr     filtername= NULL;    /* Name of filter file (re-used).        */
    AjPFile    filterf   = NULL;    /* Current filter file.                  */
    EmbPHitlist hitlist   = NULL;   /* Hitlist from input file (re-used).    */
    AjPScopalg scopalg   = NULL;    /* Scopalg from input file.              */
    ajint      x         = 0;       /* Housekeeping.                         */
    

    


    /* Read data from acd. */
    embInitPV("seqnr",argc,argv,"DOMSEARCH",VERSION);

    in        = ajAcdGetDirlist("dhfinpath");
    dosing    = ajAcdGetToggle("dosing");
    singlets    = ajAcdGetDirectory("singletsdir");
    dosets    = ajAcdGetToggle("dosets");
    insets    = ajAcdGetDirectory("insetsdir");
    mode      = ajAcdGetListSingle("mode");  
    thresh    = ajAcdGetFloat("thresh");
    threshlow = ajAcdGetFloat("threshlow");
    threshup  = ajAcdGetFloat("threshup");
    matrix    = ajAcdGetMatrixf("matrix");
    gapopen   = ajAcdGetFloat("gapopen");
    gapextend = ajAcdGetFloat("gapextend");
    out       = ajAcdGetOutdir("dhfoutdir");
    dored     = ajAcdGetToggle("dored");
    outred    = ajAcdGetOutdir("redoutdir");
    logf      = ajAcdGetOutfile("logfile");



    /* Housekeeping. */
    filtername  = ajStrNew();
    outname     = ajStrNew();


    if(!(ajStrToInt(mode, &moden)))
	ajFatal("Could not parse ACD node option");


    
    /* Process each DHF (input) in turn. */
    while(ajListPop(in,(void **)&inname))
    {
	ajFmtPrint("Processing %S\n", inname);
	ajFmtPrintF(logf, "//\n%S\n", inname);


	seq_list    = ajListNew();
	keep        = ajUintNew();  	    
	nokeep      = ajUintNew();  	    	
	
	/**********************************/
	/*         Open DHF file          */
	/**********************************/
	if((inf = ajFileNewInNameS(inname)) == NULL)
	    ajFatal("Could not open DHF file %S", inname);

	/* Read DHF file. */
	ok = ajFalse;
	if(!(infhits = embHitlistReadFasta(inf)))
	{
	    ajWarn("embHitlistReadFasta call failed in seqnr");
	    ajFmtPrintF(logf, "embHitlistReadFasta call failed in seqnr\n");
	
	    /* Read sequence set instead. */ 
	    seqset = ajSeqsetNew();
	    seqin  = ajSeqinNew();
	    ajSeqinUsa(&seqin, inname);
	
	    if(!(ajSeqsetRead(seqset, seqin)))
		ajFatal("SeqsetRead failed in seqsearch_psialigned");

	    if(ajSeqsetGetSize(seqset))
		ok = ajTrue;
	}
	else
	    if(infhits->N)
		ok = ajTrue;

	/* Close DHF file. */
	ajFileClose(&inf);
	
	/* Process empty DHF files (should never occur). */
	if(!ok)
	{		
	    ajWarn("Empty input file %S\n", inname);
	    ajFmtPrintF(logf, "Empty input file %S\n", inname);
	    if(infhits)
		embHitlistDel(&infhits);
	    if(seqset)
		ajSeqsetDel(&seqset);
	    if(seqin)
		ajSeqinDel(&seqin);
	    continue;
	}	

	
	/* 1.  Create list of sequences from the main input directory.. */
	if(infhits)
	{
	    for(x=0; x<infhits->N; x++)
	    {
		AJNEW0(seq_tmp);
		seq_tmp->Seq = ajSeqNew();
		ajStrAssignS(&seq_tmp->Seq->Acc,infhits->hits[x]->Acc);
		ajStrAssignS(&seq_tmp->Seq->Seq,infhits->hits[x]->Seq);
		ajListPushAppend(seq_list,seq_tmp);		
	    }
	} 
	else
	{	 
	    for(x=0;x<ajSeqsetGetSize(seqset);x++)
	    {
		AJNEW0(seq_tmp);
		seq_tmp->Seq = ajSeqNew();
		ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x));
		ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x));
		ajListPushAppend(seq_list,seq_tmp);		
	    }
	    ajSeqsetDel(&seqset);
	    ajSeqinDel(&seqin);
	}
	
    

	/**********************************/
	/*   Open singlets filter file    */
	/**********************************/
	if(dosing)
	{
	    /* Open singlets file. */
	    ajStrAssignS(&filtername, inname);
	    ajFilenameTrimPathExt(&filtername);
	    ajStrInsertS(&filtername, 0, ajDirGetPath(singlets));
	    ajStrAppendC(&filtername, ".");
	    ajStrAppendS(&filtername, ajDirGetExt(singlets));

	
	    if((filterf = ajFileNewInNameS(filtername)) == NULL)
	    {
		ajWarn("Could not open DHF file %S",
		       filtername);
		ajFmtPrint("Could not open singlets filter file %S",
			   filtername);
	    }
	    else
	    {
		/* Read DHF file. */
		ok = ajFalse;
		if(!(hitlist = embHitlistReadFasta(filterf)))
		{
		    ajWarn("embHitlistReadFasta call failed in seqnr");
		    ajFmtPrintF(logf, 
				"embHitlistReadFasta call failed in seqnr\n");
	
		    /* Read sequence set instead. */ 
		    seqset = ajSeqsetNew();
		    seqin  = ajSeqinNew();
		    ajSeqinUsa(&seqin, inname);
	
		    if(!(ajSeqsetRead(seqset, seqin)))
			ajFatal("SeqsetRead failed in seqnr");

		    if(ajSeqsetGetSize(seqset))
			ok = ajTrue;
		}
		else
		    if(hitlist->N)
			ok = ajTrue;


		/* Close DHF file. */
		ajFileClose(&filterf);

	
		/* Process empty DHF files (should never occur). */
		if(!ok)
		{		
		    ajWarn("Empty singlets filter file %S\n", filtername);
		    ajFmtPrintF(logf, "Empty singlets filter file %S\n", 
				filtername);
		    /* No continue this time. */
		}	

	
		/* 2. Add sequences from filter directories to List but mark 
		   them up (they are considered in the redundancy calculation 
		   but never appear in the output files). */
		if(hitlist)
		{
		    for(x=0; x<hitlist->N; x++)
		    {
			AJNEW0(seq_tmp);
			seq_tmp->Seq = ajSeqNew();
			seq_tmp->Garbage = ajTrue;
			ajStrAssignS(&seq_tmp->Seq->Acc,hitlist->hits[x]->Acc);
			ajStrAssignS(&seq_tmp->Seq->Seq,hitlist->hits[x]->Seq);
			ajListPushAppend(seq_list,seq_tmp);		
		    }
		    embHitlistDel(&hitlist);
		} 
		else
		{	 
		    for(x=0;x<ajSeqsetGetSize(seqset);x++)
		    {
			AJNEW0(seq_tmp);
			seq_tmp->Seq = ajSeqNew();
			seq_tmp->Garbage = ajTrue;
			ajStrAssignS(&seq_tmp->Seq->Acc,
				     ajSeqsetGetseqAccS(seqset, x));
			ajStrAssignS(&seq_tmp->Seq->Seq,
				     ajSeqsetGetseqSeqS(seqset, x));
			ajListPushAppend(seq_list,seq_tmp);		
		    }
		    ajSeqsetDel(&seqset);
		    ajSeqinDel(&seqin);
		}
	    }
	}
	
	
	
	/**********************************/
	/*      Open sets filter file     */
	/**********************************/
	if(dosets)
	{
	    /* Open sets file. */
	    ajStrAssignS(&filtername, inname);
	    ajFilenameTrimPathExt(&filtername);
	    ajStrInsertS(&filtername, 0, ajDirGetPath(insets));
	    ajStrAppendC(&filtername, ".");
	    ajStrAppendS(&filtername, ajDirGetExt(insets));

	
	    if((filterf = ajFileNewInNameS(filtername)) == NULL)
	    {
		ajWarn("Could not open DAF file %S", filtername);
		ajFmtPrint("Could not open sets filter file %S", filtername);
	    }
	    else
	    {
		/* Read DAF file. */
		ok = ajFalse;

		if(!(ajDmxScopalgRead(filterf, &scopalg)))
		{
		    ajWarn("ajDmxScopalgRead call failed in seqnr");
		    ajFmtPrintF(logf,
				"ajDmxScopalgRead call failed in seqnr\n");
	
		    /* Read sequence set instead. */ 
		    seqset = ajSeqsetNew();
		    seqin  = ajSeqinNew();
		    ajSeqinUsa(&seqin, inname);
		    
		    if(!(ajSeqsetRead(seqset, seqin)))
			ajFatal("SeqsetRead failed in seqnr");

		    if(ajSeqsetGetSize(seqset))
			ok = ajTrue;
		}
		else
		    if(scopalg->N)
			ok = ajTrue;


		/* Close DHF file. */
		ajFileClose(&filterf);

	
		/* Process empty DHF files (should never occur). */
		if(!ok)
		{		
		    ajWarn("Empty sets filter file %S\n",
			   filtername);
		    ajFmtPrintF(logf, "Empty sets filter file %S\n",
				filtername);
		    /* No continue this time. */
		}	

	
		/* 2. Add sequences from filter directories to List but mark 
		   them up (they are considered in the redundancy calculation 
		   but never appear in the output files).. */
		if(scopalg)
		{
		    for(x=0; x<scopalg->N; x++)
		    {
			AJNEW0(seq_tmp);
			seq_tmp->Seq = ajSeqNew();
			seq_tmp->Garbage = ajTrue;
			ajStrAssignS(&seq_tmp->Seq->Acc,scopalg->Codes[x]);
			ajStrAssignS(&seq_tmp->Seq->Seq,scopalg->Seqs[x]);
			/* Remove gap char's & whitespace. */
			ajStrRemoveGap(&seq_tmp->Seq->Seq);  
			ajListPushAppend(seq_list,seq_tmp);		
		    }
		    ajDmxScopalgDel(&scopalg);
		} 
		else
		{	 
		    for(x=0;x<ajSeqsetGetSize(seqset);x++)
		    {
			AJNEW0(seq_tmp);
			seq_tmp->Seq = ajSeqNew();
			seq_tmp->Garbage = ajTrue;
			ajStrAssignS(&seq_tmp->Seq->Acc,
				     ajSeqsetGetseqAccS(seqset, x));
			ajStrAssignS(&seq_tmp->Seq->Seq,
				     ajSeqsetGetseqSeqS(seqset, x));
			ajListPushAppend(seq_list,seq_tmp);		
		    }
		    ajSeqsetDel(&seqset);
		    ajSeqinDel(&seqin);
		}
	    }
	}
	
	
	/* 4. Identify redundant domains.. */
	if(moden == 1)
	{
	    if((!embDmxSeqNR(seq_list, &keep, &nseqnr, matrix, gapopen, 
			     gapextend, thresh, ajTrue)))
		ajFatal("embDmxSeqNR failure in seqnr");
	}		
	else
	{
	    if((!embDmxSeqNRRange(seq_list, &keep, &nseqnr, matrix, gapopen, 
			     gapextend, threshlow, threshup, ajTrue)))
		ajFatal("embDmxSeqNR failure in seqnr");
	}	
	seq_siz = ajListGetLength(seq_list);
	for(x=0; x<seq_siz; x++)
	    if(ajUintGet(keep, x) == 1)
		ajUintPut(&nokeep, x, 0);
	    else
		ajUintPut(&nokeep, x, 1);
	

	/* Create output files. */
	ajStrAssignS(&outname, inname);
	ajFilenameTrimPathExt(&outname);
	outf = ajFileNewOutNameDirS(outname, out);
	if(dored)
	    redf = ajFileNewOutNameDirS(outname, outred);
	

	/* 5. Write non-redundant domains to main output directory.  
	   6.  If specified, write redundant domains to output directory. */
	embHitlistWriteSubsetFasta(outf, infhits, keep);
	if(dored)
	    embHitlistWriteSubsetFasta(redf, infhits, nokeep);

	embHitlistDel(&infhits);
	ajFileClose(&outf);
	ajFileClose(&redf);
	ajStrDel(&inname);

	while(ajListPop(seq_list, (void **) &seq_tmp))
	{
	    ajSeqDel(&seq_tmp->Seq);
	    AJFREE(seq_tmp);
	}
	ajListFree(&seq_list);
	ajUintDel(&keep);	
	ajUintDel(&nokeep);
    }	    


    /* Tidy up. */
    ajListFree(&in);
    if(singlets)
	ajDirDel(&singlets);
    if(insets)
	ajDirDel(&insets);
    ajDiroutDel(&out);
    if(outred)
	ajDiroutDel(&outred);
    ajFileClose(&logf);

    ajMatrixfDel(&matrix);

    ajStrDel(&filtername);
    ajStrDel(&outname);
    ajStrDel(&mode);


    embExit();
    return 0;
}
Esempio n. 4
0
int main(int argc, char **argv)
{
    AjPSeqout outseq = NULL;
    AjPList list     = NULL;
    AjPSeq seq       = NULL;
    AjPStr insert    = NULL;
    AjPStr seqstr    = NULL;
    AjPStr* seqr     = NULL;
    AjPFile data     = NULL;
    ajint start   = 0;
    ajint length  = 0;
    ajint amount  = 0;
    ajint scmax   = 0;
    ajint extra   = 0;

    embInit("makeprotseq", argc, argv);

    data     = ajAcdGetInfile("pepstatsfile");
    insert   = ajAcdGetString("insert");
    start    = ajAcdGetInt("start");
    length   = ajAcdGetInt("length");
    amount   = ajAcdGetInt("amount");
    outseq   = ajAcdGetSeqoutall("outseq");

    list = ajListstrNew();

    /* this is checked by acd
    if(amount <=0 || length <= 0)
    ajFatal("Amount or length is 0 or less. "
                 "Unable to create any sequences"); */

    /* if insert, make sure sequence is large enough */
    if(ajStrGetLen(insert))
    {
        length -= ajStrGetLen(insert);
        /* start= start <= 1 ? 0 : --start; */ /* checked in acd */
        start--;

        if(length <= 0)
            ajFatal("Sequence smaller than inserted part. "
                    "Unable to create sequences.");
    }

    /* make the list of AjPStr to be used in sequence creation */
    if(data)
    {
        ajDebug("Distribution datafile '%s' given checking type\n",
                ajFileGetPrintnameC(data));
        seqstr = ajStrNew();
        ajReadlineTrim(data,&seqstr);

        if(ajStrFindC(seqstr,"PEPSTATS") == 0)
        {
            makeprotseq_parse_pepstats(&list,data);
        }
        else
        {
            ajWarn("Not pepstats file. Making completely random sequences.");
            makeprotseq_default_chars(&list);
        }

        ajStrDel(&seqstr);
        ajFileClose(&data);
    }
    else
        makeprotseq_default_chars(&list);

    /* if insert, make sure type is correct */
    /* typecheking code is not working, uncomment and test after it is
    if(ajStrGetLen(insert))
    {
    seqstr = ajStrNew();
    if(prot)
        ajStrAssignC(&seqstr,"pureprotein");
    if(!ajSeqTypeCheckS(&insert,seqstr))
        ajFatal("Insert not the same sequence type as sequence itself.");
    ajStrDel(&seqstr);
    } */

    /* array allows fast creation of a sequences */
    scmax = (ajuint) ajListstrToarray(list,&seqr);
    if(!scmax)
        ajFatal("No strings in list. No characters to make the sequence.");

    ajDebug("Distribution array done.\nscmax '%d', extra '%d', first '%S'\n",
            scmax,extra,seqr[0]);

    ajRandomSeed();

    while(amount-- > 0)
    {
        seqstr = makeprotseq_random_sequence(seqr,scmax,length);

        if(ajStrGetLen(insert))
            ajStrInsertS(&seqstr,start,insert);

        ajStrFmtLower(&seqstr);
        seq = ajSeqNew();

        ajSeqAssignSeqS(seq, seqstr);
        ajSeqSetProt(seq);

        ajSeqoutWriteSeq(outseq, seq);
        ajSeqDel(&seq);
        ajStrDel(&seqstr);
    }

    ajSeqoutClose(outseq);
    ajSeqoutDel(&outseq);
    ajListstrFreeData(&list);
    ajStrDel(&insert);
    AJFREE(seqr);

    embExit();

    return 0;
}
Esempio n. 5
0
static void domainalign_ProcessStampFile(AjPStr in, 
					 AjPStr out,
					 AjPDomain domain, 
					 ajint noden, 
					 AjPFile logf)
{
    AjPFile  outf = NULL;  /* Output file pointer.          */
    AjPFile   inf = NULL;  /* Input file pointer.           */
    AjPStr  temp1 = NULL;  /* Temporary string.             */
    AjPStr  temp2 = NULL;  /* Temporary string.             */
    AjPStr  temp3 = NULL;  /* Temporary string.             */
    AjPStr   line = NULL;  /* Line of text from input file. */
    ajint     blk = 1;     /* Count of the current block in the input file.
			      Block 1 is the numbering and protein sequences, 
			      Block 2 is the secondary structure, 
			      Block 3 is the Very/Less/Post similar records*/
    AjBool     ok = ajFalse;
    
    
    /* Initialise strings. */
    line    = ajStrNew();
    temp1    = ajStrNew();
    temp2    = ajStrNew();
    temp3    = ajStrNew();


    /* Open input and output files. */
    if(!(inf=ajFileNewInNameS(in)))
	ajFatal("Could not open input file in domainalign_ProcessStampFile");
    



    /* Start of code for reading input file. 
       Ignore everything up to first line beginning with 'Number'. */
    while((ajReadlineTrim(inf,&line)))
    {
	/* ajFileReadLine will trim the tailing \n. */
	if((ajStrGetCharPos(line, 1)=='\0'))
	{
	    ok = ajTrue;
	    break;
	}
    }
    
    
    
    /* Read rest of input file. */
    if(ok)
    {
	/* Write DOMAIN classification records to file. */
	if(!(outf=ajFileNewOutNameS(out)))
	 ajFatal("Could not open output file in domainalign_ProcessStampFile");

	
	if((domain->Type == ajSCOP))
	{
	    ajFmtPrintF(outf,"# TY   SCOP\n# XX\n");
	    ajFmtPrintF(outf,"# CL   %S",domain->Scop->Class);
	    ajFmtPrintSplit(outf,domain->Scop->Fold,"\n# XX\n# FO   ",
			    75," \t\n\r");
	    ajFmtPrintSplit(outf,domain->Scop->Superfamily,"# XX\n# SF   ",
			    75," \t\n\r");
	    ajFmtPrintSplit(outf,domain->Scop->Family,"# XX\n# FA   ",
			    75," \t\n\r");
	    ajFmtPrintF(outf,"# XX\n");
	}
	else
	{
	    ajFmtPrintF(outf,"# TY   CATH\n# XX\n");
	    ajFmtPrintF(outf,"# CL   %S",domain->Cath->Class);
	    ajFmtPrintSplit(outf,domain->Cath->Architecture,"\n# XX\n# AR   ",
			    75," \t\n\r");
	    ajFmtPrintSplit(outf,domain->Cath->Topology,"# XX\n# TP   ",75,
			    " \t\n\r");
	    ajFmtPrintSplit(outf,domain->Cath->Superfamily,"# XX\n# SF   ",75,
			    " \t\n\r");
	    ajFmtPrintF(outf,"# XX\n");
	}
	if((domain->Type == ajSCOP))
	{
	    if(noden==1) 
		ajFmtPrintF(outf,"# SI   %d\n# XX",domain->Scop->Sunid_Class);
	    else if(noden==2)
		ajFmtPrintF(outf,"# SI   %d\n# XX",domain->Scop->Sunid_Fold);
	    else if(noden==3)
		ajFmtPrintF(outf,"# SI   %d\n# XX",
			    domain->Scop->Sunid_Superfamily);
	    else if(noden==4) 	
		ajFmtPrintF(outf,"# SI   %d\n# XX",
			    domain->Scop->Sunid_Family);
	    else
		ajFatal("Node number error in domainalign_ProcessStampFile");
	}
	else
	{
	    if(noden==5) 
		ajFmtPrintF(outf,"# SI   %d\n# XX", domain->Cath->Class_Id);
	    else if(noden==6)
		ajFmtPrintF(outf,"# SI   %d\n# XX", domain->Cath->Arch_Id);
	    else if(noden==7)
		ajFmtPrintF(outf,"# SI   %d\n# XX",domain->Cath->Topology_Id);
	    else if(noden==8)
		ajFmtPrintF(outf,"# SI   %d\n# XX",
			    domain->Cath->Superfamily_Id);
	    else if(noden==9)  
		ajFmtPrintF(outf,"# SI   %d\n# XX",domain->Cath->Family_Id);
	    else
		ajFatal("Node number error in domainalign_ProcessStampFile");
	}   



	while((ajReadlineTrim(inf,&line)))
	{
	    /* Increment counter for block of file. */
	    if((ajStrGetCharPos(line, 1)=='\0'))
	    {
		blk++;
		if(blk==4)
		    blk=1;
	    
		continue;
	    }



	    /* Block of numbering line and protein sequences. */
	    if(blk==1)
	    {
		/* Print the number line out as it is. */
		if(ajStrPrefixC(line,"Number"))
		    ajFmtPrintF(outf,"\n# %7s %S\n"," ", line);
		else
		{
		    /* Read only the 7 characters
		       of the domain identifier
		       code in. */
		    ajFmtScanS(line, "%S", &temp1);
		    ajStrAssignSubS(&temp2, temp1, 0, 6);


		    /* Read the sequence. */
		    ajStrAssignSubS(&temp3, line, 13, 69);
		    ajStrExchangeSetCC(&temp3, " ", "X");
		    ajFmtPrintF(logf, "Replaced ' ' in STAMP alignment "
				"with 'X'\n");
		    ajStrFmtUpper(&temp3);
		

		    /* Write domain id code and sequence out. */
		    ajFmtPrintF(outf,"%-15S%7d %S%7d\n",
				temp2, 0, temp3, 0);
		}
	    }
	    /* Secondary structure filled with '????' (unwanted). */
	    else if(blk==2)
	    {
		continue;
	    }
	    /* Similarity lines. */
	    else
	    {
		if(ajStrPrefixC(line,"Post"))
		{
		    /* Read the sequence. */
		    ajStrAssignSubS(&temp3, line, 13, 69);

		    /* Write post similar line out. */
		    ajFmtPrintF(outf,"%-15s%7s %S\n","# Post_similar", " ",
				temp3);
		}
		/* Ignore Very and Less similar lines. */
		else continue;
	    }
	}
    }
    else /* ok == ajFalse. */
    {
	ajWarn("\n***********************************************\n"
	       "* STAMP was called but output file was EMPTY! *\n"
	       "*   NO OUTPUT FILE GENERATED FOR THIS NODE.   *\n"
	       "***********************************************\n");
	ajFmtPrintF(logf, "STAMP called but output file empty.  "
		    "No output file for this node!");
    }
    


    /* Clean up and close input and output files. */
    ajFileClose(&outf);
    ajFileClose(&inf);
    ajStrDel(&line);
    ajStrDel(&temp1);
    ajStrDel(&temp2);
    ajStrDel(&temp3);
    

    /* All done. */
    return;
}
Esempio n. 6
0
/* Function: include_alignment()
 * Date:     SRE, Sun Jul  5 15:25:13 1998 [St. Louis]
 *
 * Purpose:  Given the name of a multiple alignment file,
 *           align that alignment to the HMM, and add traces
 *           to an existing array of traces. If do_mapped
 *           is TRUE, we use the HMM's map file. If not,
 *           we use P7ViterbiAlignAlignment().
 *
 * Args:     seqfile  - name of alignment file
 *           hmm      - model to align to
 *           do_mapped- TRUE if we're to use the HMM's alignment map
 *           rsq      - RETURN: array of rseqs to add to
 *           dsq      - RETURN: array of dsq to add to
 *           sqinfo   - RETURN: array of SQINFO to add to
 *           tr       - RETURN: array of traces to add to
 *           nseq     - RETURN: number of seqs           
 *
 * Returns:  new, realloc'ed arrays for rsq, dsq, sqinfo, tr; nseq is
 *           increased to nseq+ainfo.nseq.
 */
void
include_alignment(char *seqfile, struct plan7_s *hmm, int do_mapped,
		  char ***rsq, char ***dsq, SQINFO **sqinfo, 
		  struct p7trace_s ***tr, int *nseq)
{
  int format;			/* format of alignment file */
  char **aseq;			/* aligned seqs             */
  char **newdsq;
  char **newrseq;
  AINFO ainfo;			/* info that goes with aseq */
  int   idx;			/* counter over aseqs       */
  struct p7trace_s *master;     /* master trace             */
  struct p7trace_s **addtr;     /* individual traces for aseq */

  if (! SeqfileFormat(seqfile, &format, NULL))
    switch (squid_errno) {
    case SQERR_NOFILE: 
      ajFatal("Alignment file %s could not be opened for reading", seqfile);
      /*FALLTHRU*/ /* a white lie to shut lint up */
    case SQERR_FORMAT: 
    default:           
      ajFatal("Failed to determine format of alignment file %s", seqfile);
    }
				/* read the alignment from file */
  if (! ReadAlignment(seqfile, format, &aseq, &ainfo))
    ajFatal("Failed to read aligned sequence file %s", seqfile);
  for (idx = 0; idx < ainfo.nseq; idx++)
    s2upper(aseq[idx]);
				/* Verify checksums before mapping */
  if (do_mapped && GCGMultchecksum(aseq, ainfo.nseq) != hmm->checksum)
    ajFatal("The checksums for alignment file %s and the HMM alignment map don't match.", 
	seqfile);
				/* Get a master trace */
  if (do_mapped) master = MasterTraceFromMap(hmm->map, hmm->M, ainfo.alen);
  else           master = P7ViterbiAlignAlignment(aseq, &ainfo, hmm);

				/* convert to individual traces */
  ImposeMasterTrace(aseq, ainfo.nseq, master, &addtr);
				/* add those traces to existing ones */
  *tr = MergeTraceArrays(*tr, *nseq, addtr, ainfo.nseq);
  
				/* additional bookkeeping: add to dsq, sqinfo */
  *rsq = ReallocOrDie((*rsq), sizeof(char *) * (*nseq + ainfo.nseq));
  DealignAseqs(aseq, ainfo.nseq, &newrseq);
  for (idx = *nseq; idx < *nseq + ainfo.nseq; idx++)
    (*rsq)[idx] = newrseq[idx - (*nseq)];
  free(newrseq);

  *dsq = ReallocOrDie((*dsq), sizeof(char *) * (*nseq + ainfo.nseq));
  DigitizeAlignment(aseq, &ainfo, &newdsq);
  for (idx = *nseq; idx < *nseq + ainfo.nseq; idx++)
    (*dsq)[idx] = newdsq[idx - (*nseq)];
  free(newdsq);
				/* unnecessarily complex, but I can't be bothered... */
  *sqinfo = ReallocOrDie((*sqinfo), sizeof(SQINFO) * (*nseq + ainfo.nseq));
  for (idx = *nseq; idx < *nseq + ainfo.nseq; idx++)
    SeqinfoCopy(&((*sqinfo)[idx]), &(ainfo.sqinfo[idx - (*nseq)]));
  
  *nseq = *nseq + ainfo.nseq;

				/* Cleanup */
  P7FreeTrace(master);
  FreeAlignment(aseq, &ainfo);
				/* Return */
  return;
}
Esempio n. 7
0
/* @funcstatic newcoils_read_matrix *******************************************
**
** Reads the matrix and stores in a hept_pref structure
**
** @param [u] inf [AjPFile] matrix input file
** @return [struct hept_pref*] Matrix data for heptad preference
******************************************************************************/
static struct hept_pref* newcoils_read_matrix(AjPFile inf)
{
    ajint i;
    ajint j;
    ajint pt;
    ajint aa_len;
    ajint win;

    float m_g;
    float sd_g;
    float m_cc;
    float sd_cc;
    float sc;
    float hept[NCHEPTAD];

    AjPStr buff;
    const char   *pbuff;
    
    struct hept_pref *h;


    buff = ajStrNew();

    aa_len = strlen(NCAAs);

    AJNEW(h);
    AJCNEW(h->m,aa_len);

    for(i=0; i<aa_len; ++i)
    {
	AJCNEW(h->m[i],NCHEPTAD);
	for(j=0; j<NCHEPTAD; ++j)
	    h->m[i][j] = -1;
    }

    AJNEW(h->f);

    h->n = 0;
    h->smallest = 1.0;

    while(ajReadlineTrim(inf,&buff))
    {
	pbuff = ajStrGetPtr(buff);
	if(*pbuff != '%')
	{
	    if((strncmp(pbuff,"uw ",3)==0) || (strncmp(pbuff,"w ",2)==0))
	    {
		i = h->n;
		if(strncmp(pbuff,"uw ",3)==0)
		    h->f[i].w = 0;
		else
		    h->f[i].w = 1;

		ajFmtScanS(buff,"%*s %d %f %f %f %f %f",&win,&m_cc,
			   &sd_cc,&m_g,&sd_g,&sc);

		h->f[i].win   = win;
		h->f[i].m_cc  = (float)m_cc; 
		h->f[i].sd_cc = (float)sd_cc;
		h->f[i].m_g   = (float)m_g;
		h->f[i].sd_g  = (float)sd_g;
		h->f[i].sc    = (float)sc;
		h->n++;

		AJCRESIZE(h->f,(h->n)+1);
		
		if((h->n)>=9)
		    ajFatal("Too many window parms in matrix file\n");

	    }
	    else if(*pbuff>='A' && *pbuff<='Z')
	    {
		/* AA data */
		pt = (int)(pbuff[0]-'A');
		if(h->m[pt][0]==-1)
		{
		    ajFmtScanS(buff,"%*s%f%f%f%f%f%f%f",&hept[0],
			       &hept[1],&hept[2],&hept[3],&hept[4],
			       &hept[5],&hept[6]);

		    for(i=0; i<NCHEPTAD; ++i)
		    {
			h->m[pt][i] = (float)hept[i];
			if(h->m[pt][i]>0)
			{
			    if(h->m[pt][i]<h->smallest)
				h->smallest = h->m[pt][i];
			}
			else
			    h->m[pt][i]=-1; /* Don't permit zero values */
		    }

		}
		else
		    ajWarn("multiple entries for AA %c in matrix file\n",
			   *pbuff);
	    }
	    else
	    {
		ajWarn("strange characters in matrix file\n");
		ajWarn("Ignoring line: %S\n",buff);
	    }
	}
    }


    ajStrDel(&buff);

    return h;
}
Esempio n. 8
0
/* @funcstatic domainalign_ProcessTcoffeeFile *********************************
**
** Parses tcoffee output.
**
** @param [r] in [AjPStr] Name of TCOFFEE input file
** @param [r] align [AjPStr] Name of sequence alignment file for output
** @param [r] domain [AjPDomain] Domain being aligned
** @param [r] noden [ajint] Node-level of alignment** 
** @param [r] logf [AjPFile] Log file
**
** @return [void] True on success
** @@
****************************************************************************/
static void domainalign_ProcessTcoffeeFile(AjPStr in, 
					   AjPStr align, 
					   AjPDomain domain,
					   ajint noden, 
					   AjPFile logf)
{
    AjPFile  outf = NULL;  /* Output file pointer. */
    AjPFile   inf = NULL;  /* Input file pointer. */
    AjPStr  temp1 = NULL;  /* Temporary string. */
    AjPStr  temp2 = NULL;  /* Temporary string. */
    AjPStr  temp3 = NULL;  /* Temporary string. */
    AjPStr   line = NULL;  /* Line of text from input file. */
    
    
    /* Initialise strings. */
    line    = ajStrNew();
    temp1   = ajStrNew();
    temp2   = ajStrNew();
    temp3   = ajStrNew();



    /* Open input and output files. */
    if(!(inf=ajFileNewInNameS(in)))
        ajFatal("Could not open input file in domainalign_ProcessTcoffeeFile");
    if(!(outf=ajFileNewOutNameS(align)))
        ajFatal("Could not open output file in domainalign_ProcessTcoffeeFile");
    

    /*Write DOMAIN classification records to file*/
    if((domain->Type == ajSCOP))
    {
	ajFmtPrintF(outf,"# TY   SCOP\n# XX\n");
	ajFmtPrintF(outf,"# CL   %S",domain->Scop->Class);
	ajFmtPrintSplit(outf,domain->Scop->Fold,"\n# XX\n# FO   ",75," \t\n\r");
	ajFmtPrintSplit(outf,domain->Scop->Superfamily,"# XX\n# SF   ",75," \t\n\r");
	ajFmtPrintSplit(outf,domain->Scop->Family,"# XX\n# FA   ",75," \t\n\r");
	ajFmtPrintF(outf,"# XX\n");
    }
    else
    {
	ajFmtPrintF(outf,"# TY   CATH\n# XX\n");
	ajFmtPrintF(outf,"# CL   %S",domain->Cath->Class);
	ajFmtPrintSplit(outf,domain->Cath->Architecture,"\n# XX\n# AR   ",75," \t\n\r");
	ajFmtPrintSplit(outf,domain->Cath->Topology,"# XX\n# TP   ",75," \t\n\r");
	ajFmtPrintSplit(outf,domain->Cath->Superfamily,"# XX\n# SF   ",75," \t\n\r");
	ajFmtPrintF(outf,"# XX\n");
    }
    
    if((domain->Type == ajSCOP))
    {
	if(noden==1) 
	    ajFmtPrintF(outf,"# SI   %d\n# XX\n",domain->Scop->Sunid_Class);
	else if(noden==2)
	    ajFmtPrintF(outf,"# SI   %d\n# XX\n",domain->Scop->Sunid_Fold);
	else if(noden==3)
	    ajFmtPrintF(outf,"# SI   %d\n# XX\n",domain->Scop->Sunid_Superfamily);
	else if(noden==4) 	
	    ajFmtPrintF(outf,"# SI   %d\n# XX\n", domain->Scop->Sunid_Family);
	else
	    ajFatal("Node number error in domainalign_ProcessStampFile");
    }	
    else
    {
	    if(noden==5) 
		ajFmtPrintF(outf,"# SI   %d\n# XX\n", domain->Cath->Class_Id);
	    else if(noden==6)
		ajFmtPrintF(outf,"# SI   %d\n# XX\n", domain->Cath->Arch_Id);
	    else if(noden==7)
		ajFmtPrintF(outf,"# SI   %d\n# XX\n",domain->Cath->Topology_Id);
	    else if(noden==8)
		ajFmtPrintF(outf,"# SI   %d\n# XX\n",domain->Cath->Superfamily_Id);
	    else if(noden==9)  
		ajFmtPrintF(outf,"# SI   %d\n# XX\n",domain->Cath->Family_Id);
	    else
		ajFatal("Node number error in domainalign_ProcessStampFile");
    }   


    
    /* Start of code for reading input file. */
    /*Ignore everything up to first line beginning with 'Number'*/
    while((ajReadlineTrim(inf,&line)))
        /* ajFileReadLine will trim the tailing \n. */
        if((ajStrGetCharPos(line, 1)=='\0'))
            break;

    
    /* Read rest of input file. */
    while((ajReadlineTrim(inf,&line)))
    {
      if((ajStrGetCharPos(line, 1)=='\0'))
        continue; 
        
       /* Print the number line out as it is. */
            else if(ajStrPrefixC(line,"CLUSTAL"))
              continue;
	    else if(ajStrPrefixC(line," "))
                ajFmtPrintF(outf,"\n");
        /* write out a block of protein sequences. */
        else
        {
               /* Read only the 7 characters of the domain identifier code in. */
               ajFmtScanS(line, "%S %S", &temp1,&temp3);
                 ajStrAssignSubS(&temp2, temp1, 0, 6);
  
  
         /* Read the sequence
                   ajStrAssignSubS(&temp3, line, 13, 69);
                   ajStrExchangeSetCC(&temp3, " ", "X");
                   ajStrFmtUpper(&temp3);*/
                
  
                   /* Write domain id code and sequence out. */
                   ajFmtPrintF(outf,"%-13S%S\n",temp2, temp3);              
        }
    }
    

    /* Clean up and close input and output files. */
    ajFileClose(&outf);
    ajFileClose(&inf);
    ajStrDel(&line);
    ajStrDel(&temp1);
    ajStrDel(&temp2);
    ajStrDel(&temp3);
    

    /* All done. */
    return;
}
Esempio n. 9
0
int main(int argc, char **argv)
{
    AjPSeqall seqall;
    AjPSeq seq   = NULL;
    AjPFile inf  = NULL;

    AjPStr strand = NULL;
    AjPStr substr = NULL;
    AjPStr name   = NULL;
    AjPStr mname  = NULL;
    AjPStr tname  = NULL;
    AjPStr pname  = NULL;
    AjPStr line   = NULL;
    AjPStr cons   = NULL;
    AjPStr m      = NULL;
    AjPStr n      = NULL;

    AjPAlign align= NULL; /* JISON, replaces AjPOutfile outf */
    
    ajint type;
    ajint begin;
    ajint end;
    ajulong len;
    ajint i;
    ajint j;

    float **fmatrix=NULL;

    ajint mlen;
    float maxfs;
    ajint thresh;

    float gapopen;
    float gapextend;
    float opencoeff;
    float extendcoeff;

    const char *p;

    ajulong maxarr = 1000;
    ajulong alen;
    float *path;
    ajint *compass;
    size_t stlen;

    embInit("prophet", argc, argv);

    seqall      = ajAcdGetSeqall("sequence");
    inf         = ajAcdGetInfile("infile");
    opencoeff   = ajAcdGetFloat("gapopen");
    extendcoeff = ajAcdGetFloat("gapextend");
    align       = ajAcdGetAlign("outfile");  /*JISON replacing outfile */

    opencoeff   = ajRoundFloat(opencoeff, 8);
    extendcoeff = ajRoundFloat(extendcoeff, 8);

    substr = ajStrNew();
    name   = ajStrNew();
    mname  = ajStrNew();
    tname  = ajStrNew();
    line   = ajStrNew();
    m      = ajStrNewC("");
    n      = ajStrNewC("");

    type = prophet_getType(inf,&tname);
    if(!type)
      ajFatal("Unrecognised profile/matrix file format");

    prophet_read_profile(inf,&pname,&mname,&mlen,&gapopen,&gapextend,&thresh,
			 &maxfs, &cons);
    ajAlignSetMatrixName(align, mname);
    AJCNEW(fmatrix, mlen);

    for(i=0;i<mlen;++i)
    {
	AJCNEW(fmatrix[i], AZ);
	if(!ajReadlineTrim(inf,&line))
	    ajFatal("Missing matrix line");
	p = ajStrGetPtr(line);
	p = ajSysFuncStrtok(p," \t");
	for(j=0;j<AZ;++j)
	{
	    sscanf(p,"%f",&fmatrix[i][j]);
	    p = ajSysFuncStrtok(NULL," \t");
	}
    }

    AJCNEW(path, maxarr);
    AJCNEW(compass, maxarr);

    while(ajSeqallNext(seqall, &seq))
    {
	begin = ajSeqallGetseqBegin(seqall);
	end   = ajSeqallGetseqEnd(seqall);

	ajStrAssignC(&name,ajSeqGetNameC(seq));
	strand = ajSeqGetSeqCopyS(seq);

	ajStrAssignSubC(&substr,ajStrGetPtr(strand),begin-1,end-1);
	len = ajStrGetLen(substr);

	if(len > (ULONG_MAX/(ajulong)(mlen+1)))
	    ajFatal("Sequences too big. Try 'supermatcher'");

	alen = len*mlen;
	if(alen>maxarr)
	{
	    stlen = (size_t) alen;
	    AJCRESIZE(path,stlen);
	    AJCRESIZE(compass,stlen);
	    maxarr=alen;
	}

	ajStrAssignC(&m,"");
	ajStrAssignC(&n,"");

	/* JISON used to be
	prophet_scan_profile(substr,pname,name,mlen,fmatrix,
			     outf,cons,opencoeff,
			     extendcoeff,path,compass,&m,&n,len); */

	/* JISON new call and reset align */
	prophet_scan_profile(substr,name,pname,mlen,fmatrix,
			     align,cons,opencoeff,
			     extendcoeff,path,compass,&m,&n,(ajint)len); 
	ajAlignReset(align);
	
	ajStrDel(&strand);
    }

    for(i=0;i<mlen;++i)
	AJFREE (fmatrix[i]);
    AJFREE (fmatrix);

    AJFREE(path);
    AJFREE(compass);

    ajStrDel(&line);
    ajStrDel(&cons);
    ajStrDel(&name);
    ajStrDel(&pname);
    ajStrDel(&mname);
    ajStrDel(&tname);
    ajStrDel(&substr);
    ajStrDel(&m);
    ajStrDel(&n);
    ajSeqDel(&seq);
    ajFileClose(&inf);

    ajAlignClose(align);
    ajAlignDel(&align);
    ajSeqallDel(&seqall);
    embExit();

    return 0;
}
Esempio n. 10
0
int main(int argc, char **argv)
{
    AjPStr    cl     = NULL;
    AjPSeqset seqset = NULL;
    AjPSeqout seqout = NULL;
    
    AjPStr    stmp   = NULL;
    AjPStr    email  = NULL;
    AjPStr    fmt    = NULL;
    AjPStr    trtab  = NULL;
    const AjPStr ofn = NULL;
    
    AjPStr    *applist = NULL;

    
    AjBool crc = ajFalse;
    AjBool alt = ajFalse;

    AjBool iprlook = ajFalse;
    AjBool goterms = ajFalse;
    
    ajint trlen = 0;
    
    
    ajuint i    = 0;
    ajuint lcnt = 0;
    
    
    AjPStr fn = NULL;
    AjPFile outf = NULL;
    
    
    embInitPV("eiprscan", argc, argv, "IPRSCAN", VERSION);


    seqset  = ajAcdGetSeqset("sequence");
    email   = ajAcdGetString("email");
    crc     = ajAcdGetBoolean("crc");
    applist = ajAcdGetList("appl");
    fmt     = ajAcdGetListSingle("format");
    trtab   = ajAcdGetListSingle("trtable");
    trlen   = ajAcdGetInt("trlen");
    alt     = ajAcdGetBoolean("altjobs");
    iprlook = ajAcdGetBoolean("iprlookup");
    goterms = ajAcdGetBoolean("goterms");
    outf    = ajAcdGetOutfile("outfile");
    
    stmp = ajStrNew();
    cl   = ajStrNewC("iprscan -cli");
    fn   = ajStrNew();
    


    ajFilenameSetTempname(&fn);
    seqout = ajSeqoutNew();
    if(!ajSeqoutOpenFilename(seqout, fn))
	ajFatal("Cannot open temporary file %S",fn);
    ajSeqoutSetFormatC(seqout, "fasta");
    ajSeqoutWriteSet(seqout,seqset);
    ajSeqoutClose(seqout);

    ajFmtPrintS(&stmp," -i %S",fn);
    ajStrAppendS(&cl,stmp);

    if(!ajSeqsetIsProt(seqset))
        ajStrAppendC(&cl," -seqtype n");

    if(!crc)
        ajStrAppendC(&cl," -nocrc");

    if(ajStrGetLen(email))
    {
        ajFmtPrintS(&stmp," -email %S",email);
        ajStrAppendS(&cl,stmp);
    }
    

    i = 0;
    lcnt = 0;
    
    while(applist[i])
    {
        ++lcnt;
        ++i;
    }
    

    i = 0;
    while(applist[i])
    {
        if(ajStrMatchC(applist[i],"all"))
        {
            if(lcnt != 1)
                ajFatal("Cannot specify 'all' with multiple "
                        "applications");
            ++i;
            continue;
        }
        
        ajFmtPrintS(&stmp," -appl %S",applist[i]);
        ajStrAppendS(&cl,stmp);
        ++i;
    }
    
    ajFmtPrintS(&stmp," -format %S",fmt);
    ajStrAppendS(&cl,stmp);



    ajFmtPrintS(&stmp," -trtable %S",trtab);
    ajStrAppendS(&cl,stmp);

    ajFmtPrintS(&stmp," -trlen %d",trlen);
    ajStrAppendS(&cl,stmp);

    if(alt)
        ajStrAppendC(&cl," -altjobs");

    if(iprlook)
        ajStrAppendC(&cl," -iprlookup");

    if(goterms)
        ajStrAppendC(&cl," -goterms");


    ofn = ajFileGetNameS(outf);
    ajFmtPrintS(&stmp," -o %S",ofn);
    ajFileClose(&outf);
    ajStrAppendS(&cl,stmp);

    
#if 0
   ajFmtPrint("%S\n",cl);
#endif

#if 1
    system(ajStrGetPtr(cl));
#endif


    ajSysFileUnlink(fn);
    
    
    ajStrDel(&cl);
    ajStrDel(&fn);
    ajStrDel(&stmp);
    ajStrDel(&email);
    ajStrDel(&fmt);
    ajStrDel(&trtab);
    ajStrDelarray(&applist);
    ajSeqoutDel(&seqout);
    ajSeqsetDel(&seqset);
    
    embExit();

    return 0;
}
Esempio n. 11
0
static void prophet_read_profile(AjPFile inf, AjPStr *name, AjPStr *mname,
				 ajint *mlen, float *gapopen,
				 float *gapextend, ajint *thresh,
				 float *maxs, AjPStr *cons)
{
    const char *p;
    AjPStr line = NULL;

    line = ajStrNew();

    if(!ajReadlineTrim(inf,&line))
	ajFatal("Premature EOF in profile file");
    p = ajStrGetPtr(line);

    if(strncmp(p,"Name",4))
	ajFatal("Incorrect profile/matrix file format");

    p = ajSysFuncStrtok(p," \t");
    p = ajSysFuncStrtok(NULL," \t");
    ajStrAssignC(name,p);

    if(!ajReadlineTrim(inf,&line))
	ajFatal("Premature EOF in profile file");

    p = ajStrGetPtr(line);
    if(strncmp(p,"Matrix",6))
	ajFatal("Incorrect profile/matrix file format");
    p = ajSysFuncStrtok(p," \t");
    p = ajSysFuncStrtok(NULL," \t");
    ajStrAssignC(mname,p);


    if(!ajReadlineTrim(inf,&line))
	ajFatal("Premature EOF in profile file");
    p = ajStrGetPtr(line);

    if(strncmp(p,"Length",6))
	ajFatal("Incorrect profile/matrix file format");
    sscanf(p,"%*s%d",mlen);

    if(!ajReadlineTrim(inf,&line))
	ajFatal("Premature EOF in profile file");
    p = ajStrGetPtr(line);

    if(strncmp(p,"Max_score",9))
	ajFatal("Incorrect profile/matrix file format");
    sscanf(p,"%*s%f",maxs);

    if(!ajReadlineTrim(inf,&line))
	ajFatal("Premature EOF in profile file");
    p = ajStrGetPtr(line);

    if(strncmp(p,"Threshold",9))
	ajFatal("Incorrect profile/matrix file format");
    sscanf(p,"%*s%d",thresh);


    if(!ajReadlineTrim(inf,&line))
	ajFatal("Premature EOF in profile file");
    p = ajStrGetPtr(line);

    if(strncmp(p,"Gap_open",8))
	ajFatal("Incorrect profile/matrix file format");
    sscanf(p,"%*s%f",gapopen);

    if(!ajReadlineTrim(inf,&line))
	ajFatal("Premature EOF in profile file");
    p = ajStrGetPtr(line);

    if(strncmp(p,"Gap_extend",10))
	ajFatal("Incorrect profile/matrix file format");
    sscanf(p,"%*s%f",gapextend);

    if(!ajReadlineTrim(inf,&line))
	ajFatal("Premature EOF in profile file");
    p = ajStrGetPtr(line);

    if(strncmp(p,"Consensus",9))
	ajFatal("Incorrect profile/matrix file format");
    p = ajSysFuncStrtok(p," \t\n");
    p = ajSysFuncStrtok(NULL," \t\n");
    ajStrAssignC(cons,p);

    ajStrDel(&line);

    return;
}
Esempio n. 12
0
int main(int argc, char **argv)
{
    AjPSeqset seqset  = NULL;
    AjPStr    cl      = NULL;
    AjPSeqout seqout  = NULL;
    float     thresh  = 0.;
    AjBool    netphos = ajFalse;
    AjPStr    format  = NULL;
    AjBool    plot    = ajFalse;

    AjPStr    fn     = NULL;
    AjPStr    stmp   = NULL;
    
    AjPStr  outfname = NULL;
    
    
    embInitPV("eyinoyang", argc, argv, "CBSTOOLS",VERSION);


    seqset  = ajAcdGetSeqset("sequence");
    outfname= ajAcdGetOutfileName("outfile");
    plot    = ajAcdGetBoolean("plot");
    netphos = ajAcdGetBoolean("netphos");
    thresh  = ajAcdGetFloat("threshold");
    format  = ajAcdGetListSingle("format");


    cl   = ajStrNewS(ajAcdGetpathC("yinOyang"));
    fn   = ajStrNew();
    stmp = ajStrNew();



    ajFilenameSetTempname(&fn);
    seqout = ajSeqoutNew();
    if(!ajSeqoutOpenFilename(seqout, fn))
	ajFatal("Cannot open temporary file %S",fn);
    ajSeqoutSetFormatC(seqout, "fasta");
    ajSeqoutWriteSet(seqout,seqset);
    ajSeqoutClose(seqout);


    if(ajStrMatchC(format,"short"))
        ajStrAppendC(&cl," -f s");
    else if(ajStrMatchC(format,"long"))
        ajStrAppendC(&cl," -f l");
    
    if(plot)
        ajStrAppendC(&cl," -g");

    if(netphos)
        ajStrAppendC(&cl," -y");
    
    if(netphos)
    {
        ajFmtPrintS(&stmp," -t %f",thresh);
        ajStrAppendS(&cl,stmp);
    }

    ajFmtPrintS(&stmp," %S",fn);
    ajStrAppendS(&cl,stmp);

#if 0
    ajFmtPrint("%`S\n",cl);
#endif

#if 1
    ajSysExecOutnameAppendS(cl, outfname);
#endif

    ajSysFileUnlinkS(fn);

    ajStrDel(&cl);
    ajStrDel(&fn);
    ajStrDel(&stmp);
    ajStrDel(&format);
    ajSeqoutDel(&seqout);
    ajSeqsetDel(&seqset);
    ajStrDel(&outfname);

    embExit();

    return 0;
}
Esempio n. 13
0
/* @header ********************************************************************
**
******************************************************************************/
int main(int argc, char **argv)
{
    ajFatal("Sorry no PLplot was found on compilation hence NO graph\n");
    return 0;
}
Esempio n. 14
0
int main(ajint argc, char **argv)
{
  /* Variable declarations */
  AjPFile   inf_edam   = NULL;  /* Name of EDAM data (input) file   */
  AjPFile   acdoutf    = NULL;  /* Name of ACD (output) file        */
  
  AjPList   acdinlist  = NULL;  /* List of ACD file names (input)   */
  AjPFile   acdinf     = NULL;  /* Name of ACD (input) file         */
  AjPStr    acdname    = NULL;  /* Name of current acd file         */
  AjPDirout acdoutdir  = NULL;  /* Directory for ACD files (output) */

  AjPFile   inf_ktype  = NULL;  /* Name of knowntypes.standard file */
  
  PEdam   edam         = NULL;  /* EDAM relations data              */
  PKtype  ktype        = NULL;  /* Data from knowntype.standard     */


  
  /* Read data from acd. */
  embInitP("acdrelations",argc,argv,"MYEMBOSS");
    
  /* ACD data handling */
  inf_edam   = ajAcdGetDatafile("infileedam");
  inf_ktype  = ajAcdGetInfile("infiletype");
  acdinlist  = ajAcdGetDirlist("indir");  
  acdoutdir  = ajAcdGetOutdir("outdir");
  
  /* Read data file */  
  edam  = ajEdamNew();
  ktype = ajKtypeNew();
    
  acdrelations_readdatfile(inf_edam, &edam);
  acdrelations_readtypefile(inf_ktype, &ktype);


  /*  Main application loop. Process each ACD file in turn.  */
  while(ajListPop(acdinlist,(void **)&acdname))
  {
      if(!(acdinf = ajFileNewInNameS(acdname)))   
          ajFatal("Cannot open input ACD file %S\n", acdname);
      
      ajFilenameTrimPath(&acdname);
            
      if(!(acdoutf = ajFileNewOutNameDirS(acdname, acdoutdir)))
          ajFatal("Cannot open output ACD file %S\n", acdname);

      acdrelations_procacdfile(acdinf, acdoutf, edam, ktype);
      
      ajFileClose(&acdinf);
      ajFileClose(&acdoutf);
  }
  
  /* Clean up and exit */
  ajFileClose(&inf_edam);
  ajFileClose(&inf_ktype);
  ajListFree(&acdinlist);
  ajDiroutDel(&acdoutdir);

  ajEdamDel(&edam);

  ajExit();
  return 0;
}
Esempio n. 15
0
int main(int argc, char **argv) {

  embInitPV("kclustalw", argc, argv, "KBWS", "1.0.8");

  struct soap soap;
  struct ns1__clustalwInputParams params;
  char* jobid;
  char* result;

  AjPSeqall  seqall;
  AjPSeq     seq;
  AjPFile    outf;
  AjPFile    outf_dnd;
  AjPStr     substr;
  AjPStr     inseq = NULL;
  AjPStr     alignment;
  AjPStr     output;
  AjPStr     matrix;
  AjPStr     outorder;
  ajint      ktup;
  ajint      window;
  ajint      gapopen;
  float      gapext;
  ajint      gapdist;
  AjBool     endgaps;
  ajint      pairgap;
  ajint      topdiags;
  AjPStr     score;
  AjBool     tossgaps;
  AjBool     kimura;
  AjPStr     outputtree;
  AjBool     tree;
  AjBool     quicktree;
  AjBool     align;
  AjPStr     clustering;
  ajint      numiter;
  AjPStr     iteration;
  alignment  = ajAcdGetString("alignment");
  output     = ajAcdGetString("output");
  matrix     = ajAcdGetString("matrix");
  outorder   = ajAcdGetString("outorder");
  ktup       = ajAcdGetInt("ktup");
  window     = ajAcdGetInt("window");
  gapopen    = ajAcdGetInt("gapopen");
  gapext     = ajAcdGetFloat("gapext");
  gapdist    = ajAcdGetInt("gapdist");
  endgaps    = ajAcdGetBoolean("endgaps");
  pairgap    = ajAcdGetInt("pairgap");
  topdiags   = ajAcdGetInt("topdiags");
  score      = ajAcdGetString("score");
  tossgaps   = ajAcdGetBoolean("tossgaps");
  kimura     = ajAcdGetBoolean("kimura");
  outputtree = ajAcdGetString("outputtree");
  tree       = ajAcdGetBoolean("tree");
  quicktree  = ajAcdGetBoolean("quicktree");
  align      = ajAcdGetBoolean("align");
  clustering = ajAcdGetString("clustering");
  numiter    = ajAcdGetInt("numiter");
  iteration  = ajAcdGetString("iteration");

  seqall = ajAcdGetSeqall("seqall");
  outf   = ajAcdGetOutfile("outfile");
  outf_dnd = ajAcdGetOutfile("dndoutfile");
  params.alignment = ajCharNewS(alignment);
  params.output = ajCharNewS(output);
  params.matrix = ajCharNewS(matrix);
  params.outorder = ajCharNewS(outorder);
  params.ktup = ktup;
  params.window = window;
  params.gapopen = gapopen;
  params.gapext = gapext;
  params.gapdist = gapdist;
  if (endgaps) {
    params.endgaps = xsd__boolean__true_;
  } else {
    params.endgaps = xsd__boolean__false_;
  }
  params.pairgap = pairgap;
  params.topdiags = topdiags;
  params.score = ajCharNewS(score);

  if (tossgaps) {
    params.tossgaps = xsd__boolean__true_;
  } else {
    params.tossgaps = xsd__boolean__false_;
  }
  if (kimura) {
    params.kimura = xsd__boolean__true_;
  } else {
    params.kimura = xsd__boolean__false_;
  }
  params.outputtree = ajCharNewS(outputtree);
  if (tree) {
    params.tree = xsd__boolean__true_;
  } else {
    params.tree = xsd__boolean__false_;
  }
  if (quicktree) {
    params.quicktree = xsd__boolean__true_;
  } else {
    params.quicktree = xsd__boolean__false_;
  }
  if (align) {
    params.align = xsd__boolean__true_;
  } else {
    params.align = xsd__boolean__false_;
  }
  params.clustering = ajCharNewS(clustering);
  params.numiter = numiter;
  params.iteration = ajCharNewS(iteration);

  AjPStr     tmp = NULL;
  AjPStr     tmpFileName = NULL;
  AjPSeqout  fil_file;
  AjPStr     line = NULL; /* if "AjPStr line; -> ajReadline is not success!" */
  AjPStr sizestr = NULL;
  ajint thissize;

  ajint      nb = 0;
  AjBool     are_prot = ajFalse;
  ajint      size = 0;
  AjPFile    infile;

  tmp = ajStrNewC("fasta");

  fil_file = ajSeqoutNew();
  tmpFileName = getUniqueFileName();

  if( !ajSeqoutOpenFilename(fil_file, tmpFileName) ) {
    embExitBad();
  }

  ajSeqoutSetFormatS(fil_file, tmp);

  while (ajSeqallNext(seqall, &seq)) {
    if (!nb) {
      are_prot  = ajSeqIsProt(seq);
    }
    ajSeqoutWriteSeq(fil_file, seq);
    ++nb;
  }
  ajSeqoutClose(fil_file);
  ajSeqoutDel(&fil_file);

  if (nb < 2) {
    ajFatal("Multiple alignments need at least two sequences");
  }

  infile = ajFileNewInNameS(tmpFileName);

  while (ajReadline(infile, &line)) {
    ajStrAppendS(&inseq,line);
    ajStrAppendC(&inseq,"\n");
  }

  soap_init(&soap);

  char* in0;
  in0 = ajCharNewS(inseq);
  if ( soap_call_ns1__runClustalw( &soap, NULL, NULL, in0, &params, &jobid ) == SOAP_OK ) {
    fprintf(stderr,"Jobid: %s\n",jobid);
  } else {
    soap_print_fault(&soap, stderr);
  }

  int check = 0;
  while ( check == 0 ) {
    if ( soap_call_ns1__checkStatus( &soap, NULL, NULL, jobid,  &check ) == SOAP_OK ) {
      fprintf(stderr,"*");
    } else {
      soap_print_fault(&soap, stderr);
    }
    sleep(3);
  }
  fprintf(stderr,"\n");

  char* type;

  type = "out";
  if(soap_call_ns1__getMultiResult( &soap, NULL, NULL, jobid, type, &result )== SOAP_OK) {
    substr = ajStrNewC(result);
    fprintf(stdout, "%s\n", ajStrGetPtr(substr));
  } else {
    soap_print_fault(&soap, stderr);
  }

  type = "aln";
  if(soap_call_ns1__getMultiResult( &soap, NULL, NULL, jobid, type, &result )== SOAP_OK) {
    substr = ajStrNewC(result);
    ajFmtPrintF(outf,"%S\n",substr);
  } else {
    soap_print_fault(&soap, stderr); 
  }

  type = "dnd";
  if(soap_call_ns1__getMultiResult( &soap, NULL, NULL, jobid, type, &result )== SOAP_OK) {
    substr = ajStrNewC(result);
    ajFmtPrintF(outf_dnd,"%S\n",substr);
  } else {
    soap_print_fault(&soap, stderr); 
  }


  ajSysFileUnlinkS(tmpFileName);

  soap_destroy(&soap);
  soap_end(&soap);
  soap_done(&soap);

  ajFileClose(&outf_dnd);

  ajFileClose(&outf);

  ajSeqallDel(&seqall);
  ajSeqDel(&seq);
  ajStrDel(&substr);

  embExit();

  return 0;
}
Esempio n. 16
0
/* @funcstatic domainalign_stamp **********************************************
**
** Call STAMP and process files.
**
** @param [r] prevdomain [AjPDomain] Previous domain.
** @param [r] domain [AjPDomain] This domain.
** @param [r] daf [AjPDirout] Domain alignment files.
** @param [r] super [AjPDirout] Superimposition files.
** @param [r] singlets [AjPDirout]  Singlet files.
** @param [r] align [AjPStr]   Align.
** @param [r] alignc [AjPStr] Alignc.
** @param [r] dom [AjPStr]   Dom.
** @param [r] name [AjPStr] Name.
** @param [r] set [AjPStr] Name of set file.
** @param [r] scan [AjPStr] Name of scan file.
** @param [r] sort [AjPStr] Name of sort file.
** @param [r] log [AjPStr] Lof file name.
** @param [r] out [AjPStr] Out file name.
** @param [r] keepsinglets [AjBool] Keep singlet sequences or not.
** @param [r] moden [ajint] Mode number.
** @param [r] noden [ajint] Node number.
** @param [r] nset [ajint] Number in set.
** @param [r] logf [AjPFile] Lof file.
** 
**
** @return [void] True on success
** @@
****************************************************************************/
static void domainalign_stamp(AjPDomain prevdomain,
			      AjPDomain domain, 
			      AjPDirout daf, 
			      AjPDirout super,
			      AjPDirout singlets, 
			      AjPStr    align, 
			      AjPStr    alignc, 
			      AjPStr    dom, 
			      AjPStr    name, 
			      AjPStr    set, 
			      AjPStr    scan, 
			      AjPStr    sort, 
			      AjPStr    log, 
			      AjPStr    out, 
			      AjBool    keepsinglets, 
			      ajint     moden, 
			      ajint     noden,
			      ajint     nset, 
			      AjPFile   logf)
{
    AjPStr    exec      = NULL;	/* The UNIX command line to be executed.   */
    AjPFile   clusterf  = NULL;	/* File pointer for log file.              */
    ajint     ncluster  = 0;	/* Counter for the number of clusters.     */
    AjPStr    line      = NULL;	/* Holds a line from the log file.         */
    AjPRegexp rexp      = NULL;	/* For parsing no. of clusters in log file */
    AjPStr    temp      = NULL;	/* A temporary string.                     */
    ajint     x         = 0;    /* Loop counter.                           */
    

    exec     = ajStrNew();
    line     = ajStrNew();
    temp     = ajStrNew();



    rexp     = ajRegCompC("^(Cluster:)");

    ajDebug("domainalign_stamp name: '%S'\n", name);
    
    /* Call STAMP. */
    ajFmtPrintS(&exec,	"stamp -l %S -s -n 2 -slide 5 -prefix %S -d %S",
		dom, name, set);
    ajFmtPrint("\n%S\n\n", exec);
    ajSysSystem(exec);  

    ajFmtPrintS(&exec, "sorttrans -f %S -s Sc 2.5",
		scan);
    ajFmtPrint("\n%S > %S\n\n", exec, sort);

    ajSysSystemOut(exec, sort);

    ajFmtPrintS(&exec, "stamp -l %S -prefix %S",
		sort, name);
    ajFmtPrint("\n%S > %S\n\n", exec, log);
    ajSysSystemOut(exec, log);
	
    ajFmtPrintS(&exec, "transform -f %S -g  -o %S",
		sort, alignc);
    ajFmtPrint("\n%S\n\n", exec);
    ajSysSystem(exec);
    
    
    /* Count the number of clusters in the log file. */
    if(!(clusterf=ajFileNewInNameS(log)))
	ajFatal("Could not open log file '%S'\n", log);
    ncluster=0;
    while(ajReadlineTrim(clusterf,&line))
	if(ajRegExec(rexp,line))
	    ncluster++;
    ajFileClose(&clusterf);	

    ajDebug("ncluster: %d\n", ncluster);
    
    /* Call STAMP ... calculate two fields for structural equivalence using 
       threshold Pij value of 0.5, see stamp manual v4.1 pg 27. */
    ajFmtPrintS(&exec,"poststamp -f %S.%d -min 0.5",
		name, ncluster);
    ajFmtPrint("%S\n\n", exec);
    ajSysSystem(exec);
    
    
    /* Call STAMP ... convert block format alignment into clustal format. */
    ajFmtPrintS(&exec,"ver2hor -f %S.%d.post",
		name, ncluster); 
    ajFmtPrint("%S > %S\n\n", exec, out);
    ajSysSystemOut(exec, out);
    
    
    /* Process STAMP alignment file and generate alignment file for output. */
    domainalign_ProcessStampFile(out, align, prevdomain, noden, logf);
    
    
    /* Remove all temporary files. */
    
    for(x=1;x<ncluster+1;x++)
    {
	ajFmtPrintS(&temp, "%S.%d", name, x);
	ajSysFileUnlink(temp); 
    }
    
    ajFmtPrintS(&temp, "%S.%d.post", name, ncluster);
    ajSysFileUnlink(temp); 

    ajStrDel(&exec);
    ajStrDel(&line);
    ajStrDel(&temp);
    ajRegFree(&rexp);

    return;
}   
Esempio n. 17
0
int main(int argc, char **argv)
{
    AjPFile infdat = NULL;
    AjPFile infdoc = NULL;
    AjPFile outf   = NULL;
    AjPFile outs   = NULL;

    AjBool  haspattern;

    const char   *p;


    AjPStr line  = NULL;
    AjPStr text  = NULL;
    AjPStr dirname  = NULL;
    AjPStr filename = NULL;
    AjPStr id    = NULL;
    AjPStr ac    = NULL;
    AjPStr de    = NULL;
    AjPStr pa    = NULL;
    AjPStr ps    = NULL;
    AjPStr fn    = NULL;
    AjPStr re    = NULL;
    AjPStr fname = NULL;
    AjBool flag;
    AjBool isopen;
    AjBool goback;

    ajlong storepos = 0L;


    embInit("prosextract", argc, argv);

    dirname = ajAcdGetDirectoryName("prositedir");

    line = ajStrNew();
    text = ajStrNew();

    id = ajStrNew();
    ac = ajStrNew();
    de = ajStrNew();
    pa = ajStrNew();
    ps = ajStrNew();



    fn=ajStrNew();
    ajStrAssignS(&fn,dirname);
    ajStrAppendC(&fn,"prosite.dat");
    if(!(infdat=ajFileNewInNameS(fn)))
	ajFatal("Cannot open file %S",fn);
    ajStrDel(&fn);



    fn=ajStrNewC("PROSITE/prosite.lines");
    outf = ajDatafileNewOutNameS(fn);
    ajStrDel(&fn);



    haspattern = ajFalse;

    while(ajReadlineTrim(infdat, &line) )
    {
	if(ajStrPrefixC(line, "ID"))
	{
	    if(ajStrSuffixC(line,"PATTERN."))
	    {
		haspattern = ajTrue;
		/*save id*/
		p = ajStrGetPtr(line);
		p = ajSysFuncStrtok(p," \t;");
		p = ajSysFuncStrtok(NULL," \t;");
		ajStrAssignC(&id,p);
		ajFmtPrintF(outf, "%S ", id);
		continue;
	    }
	    else
	    {
		haspattern = ajFalse;
		continue;
	    }
	}

	if(!haspattern)
	    continue;


	if(ajStrPrefixC(line, "AC") )
	{
	    p = ajStrGetPtr(line);
	    p = ajSysFuncStrtok(p, " \t;");
	    p = ajSysFuncStrtok(NULL, " \t;");
	    ajStrAssignC(&ac,p);
	    ajFmtPrintF(outf, "%S\n ", ac);
	    continue;
	}

    	if(ajStrPrefixC(line, "DE") )
	{
	    p = ajStrGetPtr(line);
	    p = ajSysFuncStrtok(p, " \t.");
	    p = ajSysFuncStrtok(NULL, " \t.");
	    ajStrAssignC(&de,p);
	    ajFmtPrintF(outf, "%S\n ", de);
	    continue;
	}


	if(ajStrPrefixC(line, "PA"))
	{
	    ajStrAssignC(&pa,"");

	    while(ajStrPrefixC(line,"PA"))
	    {
		p = ajStrGetPtr(line);
		p = ajSysFuncStrtok(p, " \t.");
		p = ajSysFuncStrtok(NULL, " \t.");
		ajStrAppendC(&pa,p);
		ajReadlineTrim(infdat, &line);
	    }

	    ajFmtPrintF(outf, "%S\n", pa);
	    re = embPatPrositeToRegExp(pa);
	    ajFmtPrintF(outf, "^%S\n\n", re);
	    ajStrDel(&re);
	    continue;
	}
    }


  /* Finished processing prosite.dat so look at prosite.doc */


    fn = ajStrNew();
    ajStrAssignS(&fn,dirname);
    ajStrAppendC(&fn,"prosite.doc");
    if(!(infdoc=ajFileNewInNameS(fn)))
	ajFatal("Cannot open file %S",fn);
    ajStrDel(&fn);



    fname  = ajStrNewC("PROSITE/");
    flag   = ajFalse;
    isopen = ajFalse;
    goback = ajFalse;


    while(ajReadlineTrim(infdoc, &text))
    {
	if(ajStrPrefixC(text, "{PS") && isopen && !goback)
	    goback = ajTrue;

	if(ajStrPrefixC(text, "{PS") && !isopen)
	{
	    storepos = ajFileResetPos(infdoc);
	    /* save out the documentation text to acc numbered outfiles . */
	    p = ajStrGetPtr(text)+1;
	    p = ajSysFuncStrtok(p, ";");
	    ajStrAssignS(&filename, fname);
	    ajStrAppendC(&filename, p);

	    outs = ajDatafileNewOutNameS(filename);
	    flag   = ajTrue;
	    isopen = ajTrue;
	    continue;
	}


	if(ajStrPrefixC(text, "{BEGIN}") && flag)
	{
	    while(ajReadlineTrim(infdoc, &text))
	    {
		if(ajStrPrefixC(text,"{END}"))
		    break;

		ajFmtPrintF(outs, "%S\n", text);
	    }
	    ajFileClose(&outs);
	    isopen = ajFalse;

	    if(goback)
	    {
		goback = ajFalse;
		ajFileSeek(infdoc,storepos,0);
	    }

	}
    }

    ajStrDel(&line);
    ajStrDel(&text);
    ajStrDel(&dirname);
    ajStrDel(&filename);

    ajStrDel(&id);
    ajStrDel(&ac);
    ajStrDel(&de);
    ajStrDel(&pa);
    ajStrDel(&re);
    ajStrDel(&ps);
    ajStrDel(&fname);


    ajFileClose(&infdat);
    ajFileClose(&infdoc);
    ajFileClose(&outf);

    embExit();

    return 0;
}
Esempio n. 18
0
int main(int argc, char **argv)
{
    ajint     famn      = 0;	 /* Counter for the families.                */
    ajint     nset      = 0;	 /* No. entries in family.                   */
    
    ajint     last_nodeid = 0;   /* SCOP Sunid of last family that was 
				    processed.                               */
    AjPStr    last_node  = NULL; /* Last family that was processed.          */
    AjPStr    exec       = NULL; /* The UNIX command line to be executed.    */
    AjPStr    out        = NULL; /* Name of stamp alignment file.            */
    AjPStr    align      = NULL; /* Name of sequence alignment file.         */
    AjPStr    alignc     = NULL; /* Name of structure alignment file.        */
    AjPStr    log        = NULL; /* Name of STAMP log file.                  */
    AjPStr    dom        = NULL; /* Name of file containing single domain.   */
    AjPStr    set        = NULL; /* Name of file containing set of domains.  */
    AjPStr    scan       = NULL; /* Name of temp. file used by STAMP.        */
    AjPStr    sort       = NULL; /* Name of temp. file used by STAMP.        */
    AjPStr    name       = NULL; /* Base name of STAMP temp files.           */
    AjPStr    pdbnames   = NULL; /* Names of domain pdb files to be passed to
				    TCOFFEEE.                                */
    AjPDir    pdb        = NULL; /* Path of domain coordinate files (pdb 
				    format input).                           */
    AjPDirout daf        = NULL; /* Path of sequence alignment files for output. */
    AjPDirout super      = NULL; /* Path of structure alignment files for output. */
    AjPDirout singlets   = NULL; /* Path of FASTA singlet sequence files for output. */
    AjPStr    temp1      = NULL; /* A temporary string.                      */

    AjPFile   dcfin      = NULL; /* File pointer for original Escop.dat file.*/
    AjPFile   domf       = NULL; /* File pointer for single domain file.     */
    AjPFile   setf       = NULL; /* File pointer for domain set file.        */
    AjPFile   logf       = NULL; /* Log file. */

    AjPDomain domain     = NULL; /* Pointer to domain structure.             */
    AjPDomain prevdomain = NULL; /* Pointer to previous domain structure.    */

    ajint     type       = 0;    /* Type of domain (ajSCOP or ajCATH) in the 
				    DCF file.                                */

    AjPStr   *node       = NULL; /* Node of alignment         .              */
    ajint     noden      = 0;    /*1: Class (SCOP), 2: Fold (SCOP) etc, see 
				   ACD file.                                 */

    AjPStr   *mode       = NULL; /* Mode of operation from acd*/
    ajint     moden      = 0;    /* Program mode, 1: MODE_STAMP, 2: MODE_TCOFFEE (not
				    yet implemented). */
    AjBool    keepsinglets= ajFalse; /*Whether to retain sequences of singlet families
				       and write them to an output file.         */

    AjPStr    temp      = NULL;	/* A temporary string.                       */
    AjPStr    cmd       = NULL; /* The command line to execute t-coffee.     */





    /* Initialise strings etc*/
    last_node = ajStrNew();
    exec     = ajStrNew();
    out      = ajStrNew();
    align    = ajStrNew();
    alignc   = ajStrNew();
    log      = ajStrNew();
    dom      = ajStrNew();
    set      = ajStrNew();
    scan     = ajStrNew();
    sort     = ajStrNew();
    name     = ajStrNew();
    temp     = ajStrNew();
    temp1    = ajStrNew();
    cmd      = ajStrNew();
    pdbnames = ajStrNew();




    /* Read data from acd. */
    embInitPV("domainalign",argc,argv,"DOMALIGN",VERSION);

    dcfin       = ajAcdGetInfile("dcfinfile");
    pdb           = ajAcdGetDirectory("pdbdir");
    daf          = ajAcdGetOutdir("dafoutdir");
    super         = ajAcdGetOutdir("superoutdir");
    singlets      = ajAcdGetOutdir("singletsoutdir");
    node          = ajAcdGetList("node");
    mode          = ajAcdGetList("mode");    
    keepsinglets  = ajAcdGetToggle("keepsinglets");
    logf          = ajAcdGetOutfile("logfile");
   

    /* Convert the selected node and mode to an integer. */
    if(!(ajStrToInt(node[0], &noden)))
	ajFatal("Could not parse ACD node option");
    if(!(ajStrToInt(mode[0], &moden)))
	ajFatal("Could not parse ACD node option");


    /* Initialise random number generator for naming of temp. files. */
    ajRandomSeed();
    ajFilenameSetTempname(&name);


    /* Create names for temp. files. */
    ajStrAssignS(&log, name);	
    ajStrAppendC(&log, ".log");
    ajStrAssignS(&dom, name);	
    ajStrAppendC(&dom, ".dom");
    ajStrAssignS(&set, name);	
    ajStrAppendC(&set, ".set");
    ajStrAssignS(&scan, name);	
    ajStrAppendC(&scan, ".scan");
    ajStrAssignS(&sort, name);
    ajStrAppendC(&sort, ".sort");
    ajStrAssignS(&out, name);	
    ajStrAppendC(&out, ".out");


    /* Initialise last_node with something that is not in SCOP. */
    ajStrAssignC(&last_node,"!!!!!");
    
    

    /* Open STAMP domain set file. */
    if(moden == MODE_STAMP)
    {
	if(!(setf=ajFileNewOutNameS(set)))
	    ajFatal("Could not open domain set file\n");
    }
    

    /* Get domain type. */
    type = ajDomainDCFType(dcfin);


    /* Start of main application loop. */
    while((domain=(ajDomainReadCNew(dcfin, "*", type))))
    {
	/* A new family. */
	if(((domain->Type == ajSCOP) &&
	    (((noden==1) && (last_nodeid != domain->Scop->Sunid_Class))      ||
	     ((noden==2) && (last_nodeid != domain->Scop->Sunid_Fold))       ||
	     ((noden==3) && (last_nodeid != domain->Scop->Sunid_Superfamily))||
	     ((noden==4) && (last_nodeid != domain->Scop->Sunid_Family))))   ||
	   ((domain->Type == ajCATH) &&
	    (((noden==5) && (last_nodeid != domain->Cath->Class_Id))         ||
	     ((noden==6) && (last_nodeid != domain->Cath->Arch_Id))          ||
	     ((noden==7) && (last_nodeid != domain->Cath->Topology_Id))      ||
	     ((noden==8) && (last_nodeid != domain->Cath->Superfamily_Id))   ||
	     ((noden==9) && (last_nodeid != domain->Cath->Family_Id)))))
	{
	    /* If we have done the first family. */
	    if(famn)
	    {

		/* Create the output file for the alignment - the name will
		   be the same as the Sunid for the DOMAIN family. */
		domainalign_writeid(prevdomain, noden, daf, super,
				    &align, &alignc);

		if(moden == MODE_STAMP)
		{
		    /* Close domain set file. */
		    ajFileClose(&setf);	

		    /* Call STAMP. */
		    
		    /* Family with 2 or more entries. */
		    if(nset > 1)
		    {
			domainalign_stamp(prevdomain, 
					  domain, 
					  daf, 
					  super,
					  singlets, 
					  align, 
					  alignc, 
					  dom, 
					  name, 
					  set, 
					  scan, 
					  sort, 
					  log, 
					  out, 
					  keepsinglets, 
					  moden, 
					  noden,
					  nset, 
					  logf);
		    }
		    
		    else if(keepsinglets) /* Singlet family. */	
			domainalign_keepsinglets(prevdomain, noden,
						 singlets, logf);
			

		    /* Open STAMP domain set file. */
		    if(!(setf=ajFileNewOutNameS(set)))
			ajFatal("Could not open domain set file\n");
		}
		else
		{
		    /* Call TCOFEE. */
		    if(nset > 1)
			domainalign_tcoffee(prevdomain, out, align,
					    alignc, pdbnames, noden, logf);
		    else if(keepsinglets) /* Singlet family. */	
			domainalign_keepsinglets(prevdomain, noden,
						 singlets, logf);
		}

		/* Set the number of members of the new family to zero. */
		nset = 0;

		/* Clear TCOFFEE argument. */    
		ajStrSetClear(&pdbnames);
	    }	
	    
	    
	    /* Open, write and close STAMP domain file. */
	    if(moden == MODE_STAMP)
	    {
		if(!(domf=ajFileNewOutNameS(dom)))
		    ajFatal("Could not open domain file\n");
		ajStrAssignS(&temp, ajDomainGetId(domain));
		ajStrFmtLower(&temp);
		ajFmtPrintF(domf, "%S %S { ALL }\n", temp, temp);
		ajFileClose(&domf);	
	    }
	    
	    
	    /* Copy current family name to last_node. */
	    domainalign_writelast(domain, noden, &last_node, &last_nodeid);
	    
	    /* Copy current domain pointer to prevdomain. */
	    ajDomainDel(&prevdomain);
	    prevdomain=NULL;
	    ajDomainCopy(&prevdomain, domain);

	    /* Increment family counter. */
	    famn++;
	}
	
						
	ajStrAssignS(&temp, ajDomainGetId(domain));
	ajStrFmtLower(&temp);

	/* Write STAMP domain set file. */
	if(moden == MODE_STAMP)
	    ajFmtPrintF(setf, "%S %S { ALL }\n", temp, temp);
	/* Write TCOFFEE argument. */    
	else
	{
	    ajStrAppendS(&pdbnames, ajDirGetPath(pdb));
	    ajStrAppendS(&pdbnames, temp);
	    ajStrAppendC(&pdbnames, ".");
	    ajStrAppendS(&pdbnames, ajDirGetExt(pdb));
	    ajStrAppendC(&pdbnames, " ");
	}
	
	ajDomainDel(&domain);

	/* Increment number of members in family. */
	nset++;
    }
    
    /* End of main application loop. */
    domain=prevdomain;
    

    ajFmtPrint("\nProcessing node %d\n", last_nodeid);
    


    /* Create the output file for the alignment - the name will
       be the same as the Sunid for the DOMAIN family. */
    domainalign_writeid(prevdomain, noden, daf, super, &align, &alignc);



    /* Code to process last family. */
    if(moden == MODE_STAMP)
    {
	/*Close domain set file. */
	ajFileClose(&setf);	

		
	/*    ajFmtPrint("\n***** SECOND CALL\n");. */
	if(nset > 1)
	{
	    domainalign_stamp(prevdomain, 
			      domain, 
			      daf, 
			      super,
			      singlets, 
			      align, 
			      alignc, 
			      dom, 
			      name, 
			      set, 
			      scan, 
			      sort, 
			      log, 
			      out, 
			      keepsinglets, 
			      moden, 
			      noden,
			      nset, 
			      logf);
	}
	
	else if(keepsinglets) /* Singlet family. */	
	    domainalign_keepsinglets(prevdomain, noden, singlets, logf);
			
    }
    else
    {
	/* Call TCOFEE. */
	if(nset > 1)
	    domainalign_tcoffee(prevdomain, out, align, alignc, 
				pdbnames, noden, logf);
	else if(keepsinglets) /* Singlet family. */	
	    domainalign_keepsinglets(prevdomain, noden, singlets, logf);
    }


    /* Remove all temporary files. */

    ajSysFileUnlink(log);
    ajSysFileUnlink(dom);
    ajSysFileUnlink(set);
    ajSysFileUnlink(scan);
    ajSysFileUnlink(sort);
    ajSysFileUnlink(out);
    ajStrAssignS(&temp, name);	
    ajStrAppendC(&temp, ".mat");
    ajSysFileUnlink(temp);
    
    

    /* Tidy up*/
    ajDomainDel(&domain);
    ajFileClose(&dcfin);	
    ajStrDel(&last_node);
    ajStrDel(&exec);
    ajStrDel(&log);
    ajStrDel(&dom);
    ajStrDel(&set);
    ajStrDel(&scan);
    ajStrDel(&sort);
    ajStrDel(&name);
    ajStrDel(&out);
    ajStrDel(&align);
    ajStrDel(&alignc);
    ajStrDel(&pdbnames);
    ajDirDel(&pdb); 
    ajDiroutDel(&daf); 
    ajDiroutDel(&super); 
    ajDiroutDel(&singlets); 
    ajStrDel(&temp); 
    ajStrDel(&temp1); 
    ajStrDel(&node[0]);
    AJFREE(node);
    ajStrDel(&mode[0]);
    AJFREE(mode);
    ajFileClose(&logf);
    
    ajExit();
    return 0;
}
Esempio n. 19
0
static AjBool assemblyexceptionadaptorFetchAllBySQL(
    EnsPAssemblyexceptionadaptor aea,
    const AjPStr statement,
    AjPList aes)
{
    ajint ori = 0;

    ajuint identifier = 0;
    ajuint erid       = 0;
    ajuint srid       = 0;
    ajuint erstart    = 0;
    ajuint srstart    = 0;
    ajuint erend      = 0;
    ajuint srend      = 0;

    AjPSqlstatement sqls = NULL;
    AjISqlrow sqli       = NULL;
    AjPSqlrow sqlr       = NULL;

    AjPStr typestr = NULL;

    EnsEAssemblyexceptionType type = ensEAssemblyexceptionTypeNULL;

    EnsPAssemblyexception ae = NULL;

    if(!aea)
        return ajFalse;

    if(!statement)
        return ajFalse;

    if(!aes)
        return ajFalse;

    sqls = ensDatabaseadaptorSqlstatementNew(aea->Adaptor, statement);

    sqli = ajSqlrowiterNew(sqls);

    while(!ajSqlrowiterDone(sqli))
    {
        identifier = 0;
        srid       = 0;
        srstart    = 0;
        srend      = 0;
        typestr    = ajStrNew();
        erid       = 0;
        erstart    = 0;
        erend      = 0;
        ori        = 0;

        type = ensEAssemblyexceptionTypeNULL;

        sqlr = ajSqlrowiterGet(sqli);

        ajSqlcolumnToUint(sqlr, &identifier);
        ajSqlcolumnToUint(sqlr, &srid);
        ajSqlcolumnToUint(sqlr, &srstart);
        ajSqlcolumnToUint(sqlr, &srend);
        ajSqlcolumnToStr(sqlr, &typestr);
        ajSqlcolumnToUint(sqlr, &erid);
        ajSqlcolumnToUint(sqlr, &erstart);
        ajSqlcolumnToUint(sqlr, &erend);
        ajSqlcolumnToInt(sqlr, &ori);

        /* Set the Assembly Exception type. */

        type = ensAssemblyexceptionTypeFromStr(typestr);

        if(!type)
            ajFatal("assemblyexceptionadaptorFetchAllBySQL "
                    "got unexpected Assembly Exception type '%S' "
                    "from database.\n",
                    typestr);

        ae = ensAssemblyexceptionNew(aea,
                                     identifier,
                                     srid,
                                     srstart,
                                     srend,
                                     erid,
                                     erstart,
                                     erend,
                                     ori,
                                     type);

        ajListPushAppend(aes, (void *) ae);

        ajStrDel(&typestr);
    }

    ajSqlrowiterDel(&sqli);

    ensDatabaseadaptorSqlstatementDel(aea->Adaptor, &sqls);

    return ajTrue;
}
Esempio n. 20
0
static ajint silent_restr_read(AjPList *relist,const AjPStr enzymes)
{
    EmbPPatRestrict rptr = NULL;
    AjPFile fin = NULL;

    AjPStr refilename = NULL;
    register ajint RStotal = 0;
    PRinfo rinfo = NULL;
    AjBool isall = ajFalse;
    ajint ne = 0;
    ajint i;
    AjPStr *ea = NULL;

    refilename = ajStrNewC("REBASE/embossre.enz");
    rptr       = embPatRestrictNew();
    *relist    = ajListNew();

    fin = ajDatafileNewInNameS(refilename);
    if(!fin)
	ajFatal("Aborting...restriction file '%S' not found", refilename);

    /* Parse the user-selected enzyme list */
    if(!enzymes)
	isall = ajTrue;
    else
    {
	ne = ajArrCommaList(enzymes,&ea);
        for(i=0;i<ne;++i)
	    ajStrRemoveWhite(&ea[i]);

        if(ajStrMatchCaseC(ea[0],"all"))
            isall = ajTrue;
        else
            isall = ajFalse;
    }

    while(!ajFileIsEof(fin))
    {
        if(!embPatRestrictReadEntry(rptr,fin))
	    continue;

     	if(!isall)
	{
		for(i=0;i<ne;++i)
		if(ajStrMatchCaseS(ea[i],rptr->cod))
			break;
	    	if(i==ne)
			continue;
        }

        AJNEW(rinfo);
        /* reading in RE info into rinfo from EmbPPatRestrict structure */
        rinfo->code  = ajStrNewS(rptr->cod);
	rinfo->site  = ajStrNewS(rptr->pat);
	rinfo->revsite  = ajStrNewS(rptr->pat);
        ajSeqstrReverse(&rinfo->revsite);
        rinfo->ncuts = rptr->ncuts;
        rinfo->cut1  = rptr->cut1;
        rinfo->cut2  = rptr->cut2;
        rinfo->cut3  = rptr->cut3;
        rinfo->cut4  = rptr->cut4;
	ajListPush(*relist,(void *)rinfo);
	RStotal++;
    }

    for(i=0;i<ne;++i)
	ajStrDel(&ea[i]);
    AJFREE(ea);

    embPatRestrictDel(&rptr);
    ajFileClose(&fin);
    ajStrDel(&refilename);

    return RStotal;
}
Esempio n. 21
0
int main(int argc, char **argv)
{
    const char *codons[]=
    {
	"TAG","TAA","TGA","GCG","GCA","GCT","GCC","TGT", /* 00-07 */
	"TGC","GAT","GAC","GAA","GAG","TTT","TTC","GGT", /* 08-15 */
	"GGG","GGA","GGC","CAT","CAC","ATA","ATT","ATC", /* 16-23 */
	"AAA","AAG","CTA","TTA","TTG","CTT","CTC","CTG", /* 24-31 */
	"ATG","AAT","AAC","CCG","CCA","CCT","CCC","CAA", /* 32-39 */
	"CAG","CGT","CGA","CGC","AGG","AGA","CGG","TCG", /* 40-47 */
	"TCA","AGT","TCT","TCC","AGC","ACG","ACT","ACA", /* 48-55 */
	"ACC","GTA","GTT","GTC","GTG","TGG","TAT","TAC"	 /* 56-63 */
    };

    const char *aa=
	"***AAAACCDDEEFFGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVWYY";

    AjPFile inf     = NULL;
    AjPFile outf    = NULL;
    char *entryname = NULL;
    AjPStr fname    = NULL;
    AjPStr key      = NULL;
    AjPStr tmpkey   = NULL;
    AjBool allrecords = AJFALSE;

    AjPTable table  = NULL;
    ajint i = 0;
    ajint j = 0;
    ajint k = 0;
    ajint x = 0;
    ajint savecount[3];

    AjPStr *keyarray = NULL;
    CutgPValues *valarray = NULL;
    AjPCod codon  = NULL;
    ajint sum = 0;
    char c;

    AjPList flist = NULL;
    AjPFile logf = NULL;
    AjPStr  entry = NULL;
    AjPStr  baseentry = NULL;
    AjPStr  wild  = NULL;
    AjPStr division = NULL;
    AjPStr release = NULL;
    AjPStr wildspecies = NULL;
    CutgPValues value = NULL;
    AjPStr docstr = NULL;
    AjPStr species = NULL;
    AjPStr filename = NULL;
    ajint nstops;

    embInit("cutgextract",argc,argv);

    tmpkey = ajStrNew();
    fname  = ajStrNew();


    table = ajTablestrNewLen(TABLE_ESTIMATE);


    flist = ajAcdGetDirlist("directory");
    wild  = ajAcdGetString("wildspec");
    release  = ajAcdGetString("release");
    logf = ajAcdGetOutfile("outfile");
    wildspecies = ajAcdGetString("species");
    filename = ajAcdGetString("filename");
    allrecords = ajAcdGetBoolean("allrecords");

    ajStrInsertC(&release, 0, "CUTG");
    ajStrRemoveWhite(&release);

    while(ajListPop(flist,(void **)&entry))
    {
	ajStrAssignS(&baseentry, entry);
	ajFilenameTrimPath(&baseentry);
	ajDebug("Testing file '%S'\n", entry);
	if(!ajStrMatchWildS(baseentry,wild))
	{
	    ajStrDel(&entry);
	    continue;
	}

	ajDebug("... matched wildcard '%S'\n", wild);
	inf = ajFileNewInNameS(entry);
	if(!inf)
	    ajFatal("cannot open file %S",entry);

	ajFmtPrintS(&division, "%F", inf);
	ajFilenameTrimAll(&division);

	while((entryname = cutgextract_next(inf, wildspecies,
					    &species, &docstr)))
	{
	    if(ajStrGetLen(filename))
		ajStrAssignS(&tmpkey,filename);
	    else
		ajStrAssignC(&tmpkey,entryname);

	    /* See if organism is already in the table */
	    value = ajTableFetch(table,tmpkey);
	    if(!value)			/* Initialise */
	    {
		key = ajStrNewS(tmpkey);
		AJNEW0(value);
		ajStrAssignS(&value->Species,species);
		ajStrAssignS(&value->Division, division);
		ajTablePut(table,(void *)key,(void *)value);
	    }
	    for(k=0;k<3;k++)
		savecount[k] = value->Count[k];
	    nstops = cutgextract_readcodons(inf,allrecords, value->Count);
	    if(nstops < 1)
	    {
		value->Skip++;
		continue;
	    }
	    value->CdsCount++;
	    if(nstops>1)
	    {
		value->CdsCount += (nstops - 1);
		value->Warn++;
		ajWarn("Found %d stop codons (%d %d %d) for CDS '%S'",
		       nstops,
		       value->Count[0] - savecount[0],
		       value->Count[1] - savecount[1],
		       value->Count[2] - savecount[2],
		       cutgextractSavepid);
	    }
	}
	ajStrDel(&entry);
	ajFileClose(&inf);
    }

    ajTableToarrayKeysValues(table,(void***) &keyarray, (void***) &valarray);

    i = 0;
    while(keyarray[i])
    {
	key   = keyarray[i];
	value = (CutgPValues) valarray[i++];
	codon = ajCodNew();
	sum   = 0;
	for(j=0;j<CODONS;++j)
	{
	    sum += value->Count[j];
	    x = ajCodIndexC(codons[j]);
	    codon->num[x] = value->Count[j];

	    c = aa[j];
	    if(c=='*')
		codon->aa[x] = 27;
	    else
		codon->aa[x] = c-'A';
	}
	ajCodCalcUsage(codon,sum);

	ajStrAppendC(&key, ".cut");
	if(allrecords)
	{
	    if(value->Warn)
		ajFmtPrintF(logf, "Writing %S CDS: %d Warnings: %d\n",
			    key, value->CdsCount, value->Warn);
	    else
		ajFmtPrintF(logf, "Writing %S CDS: %d\n",
			    key, value->CdsCount);
	}
	else
	{
	    if(value->Skip)
		ajFmtPrintF(logf, "Writing %S CDS: %d Skipped: %d\n",
			    key, value->CdsCount, value->Skip);
	    else
		ajFmtPrintF(logf, "Writing %S CDS: %d\n",
			    key, value->CdsCount);
	}

	ajFmtPrintS(&fname,"CODONS/%S",key);
	outf = ajDatafileNewOutNameS(fname);
	if(!outf)
	    ajFatal("Cannot open output file %S",fname);

	ajCodSetNameS(codon, key);
	ajCodSetSpeciesS(codon, value->Species);
	ajCodSetDivisionS(codon, value->Division);
	ajCodSetReleaseS(codon, release);
	ajCodSetNumcds(codon, value->CdsCount);
	ajCodSetNumcodons(codon, sum);

	ajCodWrite(codon, outf);
	ajFileClose(&outf);


	ajStrDel(&key);
	ajStrDel(&value->Division);
	ajStrDel(&value->Doc);
	ajStrDel(&value->Species);
	AJFREE(value);
	ajCodDel(&codon);
    }

    AJFREE(keyarray);
    AJFREE(valarray);

    ajTableFree(&table);
    ajListFree(&flist);
    ajStrDel(&wild);
    ajStrDel(&release);
    ajStrDel(&wildspecies);
    ajStrDel(&filename);
    ajFileClose(&logf);

    ajStrDel(&cutgextractSavepid);
    ajStrDel(&cutgextractLine);
    ajStrDel(&cutgextractOrg);

    ajStrDel(&fname);
    ajStrDel(&tmpkey);
    ajStrDel(&species);
    ajStrDel(&docstr);
    ajStrDel(&division);
    ajStrDel(&baseentry);

    embExit();

    return 0;
}
Esempio n. 22
0
int main(int argc, char **argv) 
{
  const char      *hmmfile;	/* file to read HMMs from                  */
  HMMFILE         *hmmfp;       /* opened hmmfile for reading              */
  const char      *seqfile;     /* file to read target sequence from       */ 
  char           **rseq;        /* raw, unaligned sequences                */ 
  SQINFO          *sqinfo;      /* info associated with sequences          */
  char           **dsq;         /* digitized raw sequences                 */
  int              nseq;        /* number of sequences                     */  
  char           **aseq;        /* aligned sequences                       */
  AINFO            ainfo;       /* alignment information                   */
  float           *wgt;         /* per-sequence weights                    */
  int              i;
  struct plan7_s    *hmm;       /* HMM to align to                         */ 
  struct p7trace_s **tr;        /* traces for aligned sequences            */

  int   be_quiet;		/* TRUE to suppress verbose banner          */
  int   matchonly;		/* TRUE to show only match state syms       */
  const char *outfile;          /* optional alignment output file           */
  FILE *ofp;                    /* handle on alignment output file          */
  AjPFile ajwithali;          /* name of additional alignment file to align */
  AjPFile ajmapali;           /* name of additional alignment file to map   */
  AjBool ajmatch=ajFalse;
  AjPFile outf=NULL;
  AjPStr  outfname=NULL;
  AjPFile inf=NULL;
  AjPStr  infname=NULL;
  AjPSeqset seqset=NULL;
  AjPStr  ajseqfile=NULL;
  char*  mapali=NULL;
  char*  withali=NULL;
  
#ifdef MEMDEBUG
  unsigned long histid1, histid2, orig_size, current_size;
  orig_size = malloc_inuse(&histid1);
  fprintf(stderr, "[... memory debugging is ON ...]\n");
#endif

  /*********************************************** 
   * Parse command line
   ***********************************************/
  
  matchonly = FALSE;
  outfile   = NULL;
  be_quiet  = FALSE;
  withali   = NULL;
  mapali    = NULL;

  embInitPV("ohmmalign",argc,argv,"HMMER",VERSION);

  ajmatch = ajAcdGetBoolean("matchonly");
  if(ajmatch)
      matchonly=TRUE;
  else
      matchonly=FALSE;



  ajmapali = ajAcdGetInfile("mapalifile");
  if (ajmapali)
      mapali = ajCharNewS(ajFileGetNameS(ajmapali));
  ajFileClose(&ajmapali);
  ajwithali = ajAcdGetInfile("withalifile");
  if (ajwithali)
      withali = ajCharNewS(ajFileGetNameS(ajwithali));
  ajFileClose(&ajwithali);

  be_quiet=TRUE;



  outf = ajAcdGetOutfile("outfile");
  outfname = ajStrNewC((char *)ajFileGetNameC(outf));
  if(*ajStrGetPtr(outfname)>31)
      ajFileClose(&outf);
  outfile = ajStrGetPtr(outfname);

  inf = ajAcdGetInfile("hmmfile");
  infname = ajStrNewC((char *)ajFileGetNameC(inf));
  ajFileClose(&inf);
  hmmfile = ajStrGetPtr(infname);

  
  seqset = ajAcdGetSeqset("sequences");
  ajseqfile = ajStrNewC(ajStrGetPtr(seqset->Filename));
  seqfile = ajStrGetPtr(ajseqfile);
  

 /*********************************************** 
  * Open HMM file (might be in HMMERDB or current directory).
  * Read a single HMM from it.
  * 
  * Currently hmmalign disallows the J state and
  * only allows one domain per sequence. To preserve
  * the S/W entry information, the J state is explicitly
  * disallowed, rather than calling a Plan7*Config() function.
  * this is a workaround in 2.1 for the 2.0.x "yo!" bug.
  ***********************************************/

  if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL)
    ajFatal("Failed to open HMM file %s\n", hmmfile);
  if (!HMMFileRead(hmmfp, &hmm)) 
    ajFatal("Failed to read any HMMs from %s\n", hmmfile);
  HMMFileClose(hmmfp);
  if (hmm == NULL) 
    ajFatal("HMM file %s corrupt or in incorrect format? Parse failed", hmmfile);
  hmm->xt[XTE][MOVE] = 1.;	      /* only 1 domain/sequence ("global" alignment) */
  hmm->xt[XTE][LOOP] = 0.;
  P7Logoddsify(hmm, TRUE);
				/* do we have the map we might need? */
  if (mapali != NULL && ! (hmm->flags & PLAN7_MAP))
    ajFatal("HMMER: HMM file %s has no map; you can't use --mapali.", hmmfile);

  /*********************************************** 
   * Open sequence file in current directory.
   * Read all seqs from it.
   ***********************************************/
/*
  if (! SeqfileFormat(seqfile, &format, NULL))
    switch (squid_errno) {
    case SQERR_NOFILE: 
      ajFatal("Sequence file %s could not be opened for reading", seqfile);
    case SQERR_FORMAT: 
    default:           
      ajFatal("Failed to determine format of sequence file %s", seqfile);
    }
  if (! ReadMultipleRseqs(seqfile, format, &rseq, &sqinfo, &nseq))
    ajFatal("Failed to read any sequences from file %s", seqfile);
*/

  emboss_rseqs(seqset,&rseq,&sqinfo,&nseq);

  /*********************************************** 
   * Show the banner
   ***********************************************/

  be_quiet=TRUE;
  if (! be_quiet) 
    {
/*      Banner(stdout, banner); */
      printf(   "HMM file:             %s\n", hmmfile);
      printf(   "Sequence file:        %s\n", seqfile);
      printf("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n");
    }

  /*********************************************** 
   * Do the work
   ***********************************************/

  /* Allocations and initializations.
   */
  dsq = MallocOrDie(sizeof(char *) * nseq);
  tr  = MallocOrDie(sizeof(struct p7trace_s *) * nseq);

  /* Align each sequence to the model, collect traces
   */
  for (i = 0; i < nseq; i++)
    {
      dsq[i] = DigitizeSequence(rseq[i], sqinfo[i].len);

      if (P7ViterbiSize(sqinfo[i].len, hmm->M) <= RAMLIMIT)
	(void) P7Viterbi(dsq[i], sqinfo[i].len, hmm, &(tr[i]));
      else
	(void) P7SmallViterbi(dsq[i], sqinfo[i].len, hmm, &(tr[i]));
    }

  /* Include an aligned alignment, if desired.
   */
  if (mapali != NULL)
    include_alignment(mapali, hmm, TRUE, &rseq, &dsq, &sqinfo, &tr, &nseq);
  if (withali != NULL) 
    include_alignment(withali, hmm, FALSE, &rseq, &dsq, &sqinfo, &tr, &nseq);

  /* Turn traces into a multiple alignment
   */ 
  wgt = MallocOrDie(sizeof(float) * nseq);
  FSet(wgt, nseq, 1.0);
  P7Traces2Alignment(dsq, sqinfo, wgt, nseq, hmm->M, tr, matchonly,
		     &aseq, &ainfo);

  /*********************************************** 
   * Output the alignment
   ***********************************************/

  if (outfile != NULL && (ofp = fopen(outfile, "w")) != NULL)
    {
      WriteSELEX(ofp, aseq, &ainfo, 50);
      printf("Alignment saved in file %s\n", outfile);
      fclose(ofp);
    }
  else
    WriteSELEX(stdout, aseq, &ainfo, 50);

  /*********************************************** 
   * Cleanup and exit
   ***********************************************/
  
  for (i = 0; i < nseq; i++) 
    {
      P7FreeTrace(tr[i]);
      FreeSequence(rseq[i], &(sqinfo[i]));
      free(dsq[i]);
    }
  FreeAlignment(aseq, &ainfo);
  FreePlan7(hmm);
  free(sqinfo);
  free(rseq);
  free(dsq);
  free(wgt);
  free(tr);

  SqdClean();

  ajStrDel(&outfname);
  ajStrDel(&infname);
  ajStrDel(&ajseqfile);
  

#ifdef MEMDEBUG
  current_size = malloc_inuse(&histid2);
  if (current_size != orig_size) malloc_list(2, histid1, histid2);
  else fprintf(stderr, "[No memory leaks.]\n");
#endif

  ajSeqsetDel(&seqset);
  ajFileClose(&ajwithali);
  ajFileClose(&ajmapali);

  embExit();
  
  return 0;
}
Esempio n. 23
0
int main(int argc, char **argv)
{
    /* ACD data item variables */
    AjPSeqset alignfile = NULL; 
    AjPFile       prior = NULL;
    AjPFile        null = NULL;
    AjPFile         pam = NULL;
    float        pamwgt = 0.0;
    AjPStr         nhmm = NULL;
    AjPStr     strategy = NULL;
    ajint      pbswitch = 0;
    float       archpri = 0.0;
    AjBool      binary  = ajFalse;
    AjBool         fast = ajFalse;
    float        gapmax = 0.0;
    AjBool         hand = ajFalse;
    float       idlevel = 0.0;
    AjBool        noeff = ajFalse;
    float       swentry = 0.0;
    float        swexit = 0.0;
    AjBool    verbosity = ajFalse;
    AjPStr    weighting = NULL;
    AjPFile     hmmfile = NULL;
    AjPFile           o = NULL;
    AjPFile       cfile = NULL;

    /* Housekeeping variables */
    AjPStr          cmd = NULL;
    AjPStr         rnd1 = NULL;
    AjPStr         rnd2 = NULL;
    AjPStr          fmt = NULL;
    char         option;
    AjBool        fmtok = ajFalse;
    AjPStr  hmmfilename = NULL;
    




    /* ACD file processing */
    embInitPV("ehmmbuild",argc,argv,"HMMERNEW",VERSION);

    alignfile = ajAcdGetSeqset("alignfile");
    prior     = ajAcdGetInfile("prior");
    null      = ajAcdGetInfile("null");
    pam       = ajAcdGetInfile("pam");
    pamwgt    = ajAcdGetFloat("pamwgt");
    nhmm      = ajAcdGetString("nhmm");
    strategy  = ajAcdGetListSingle("strategy");
    pbswitch  = ajAcdGetInt("pbswitch");
    archpri   = ajAcdGetFloat("archpri");
    binary    = ajAcdGetBoolean("binary");
    fast      = ajAcdGetBoolean("fast");
    gapmax    = ajAcdGetFloat("gapmax");
    hand      = ajAcdGetBoolean("hand");
    idlevel   = ajAcdGetFloat("sidlevel");
    noeff     = ajAcdGetBoolean("noeff");
    swentry   = ajAcdGetFloat("swentry");
    swexit    = ajAcdGetFloat("swexit");
    verbosity = ajAcdGetBoolean("verbosity");
    weighting = ajAcdGetListSingle("weighting");
    hmmfile   = ajAcdGetOutfile("hmmfile");
    o         = ajAcdGetOutfile("o");
    cfile     = ajAcdGetOutfile("cfile");





    /* MAIN APPLICATION CODE */
    /* 1. Housekeeping */
    cmd  = ajStrNew();
    rnd1 = ajStrNew();
    rnd2 = ajStrNew();
    fmt  = ajStrNew();
    hmmfilename = ajStrNew();

    ajStrAssignC(&hmmfilename, ajFileGetNameC(hmmfile));


    /* 2. Ensure alignfile is in format HMMER can understand.  These include
       FASTA, GENBANK,EMBL, GCG, PIR, STOCKHOLM, SELEX, MSF,CLUSTAL and PHYLIP.
       EMBOSS name definitions are taken from seqInFormatDef in ajseqread.c and
       seqOutFormat in ajseqwrite.c */
    fmtok=ajFalse;
    ajStrAssignS(&fmt, ajSeqsetGetFormat(alignfile));
    if(ajStrMatchC(fmt, "fasta")    ||
       ajStrMatchC(fmt, "genbank")  ||
       ajStrMatchC(fmt, "embl")     ||
       ajStrMatchC(fmt, "gcg")      ||
       ajStrMatchC(fmt, "pir")      ||
       ajStrMatchC(fmt, "stockholm")||
       ajStrMatchC(fmt, "selex")    ||
       ajStrMatchC(fmt, "msf")      ||
       ajStrMatchC(fmt, "clustal")  ||
       ajStrMatchC(fmt, "phylip"))
	fmtok = ajTrue;
    /* This could be replaced with code to reformat the file. */
    if(!fmtok)
	ajFatal("Input alignment ('alignfile' ACD option) is not in format "
		"HMMER understands. Please use a a file in FASTA, GENBANK, "
		"EMBL, GCG, PIR, STOCKHOLM, SELEX, MSF,CLUSTAL or PHYLIP format.");
    

    /* 3. Build hmmbuild command line */
    /* Command line is built in this order: 
       i.  Application name.
       ii. HMMER 'options' (in order they appear in ACD file)
       iii.HMMER 'options' (that don't appear in ACD file)
       iv. HMMER & new parameters.
       */
    ajStrAssignS(&cmd, ajAcdGetpathC("hmmbuild"));
    if(prior)
	ajFmtPrintAppS(&cmd, " --prior %s ", ajFileGetNameC(prior));
    if(null)
	ajFmtPrintS(&cmd, " --null %s ", ajFileGetNameC(null));
    if(pam)
	ajFmtPrintAppS(&cmd, " --pam %s  --pamwgt %f ", ajFileGetNameC(pam), pamwgt);
    ajFmtPrintAppS(&cmd, " -n %S ", nhmm);

    /* ACD option only allows one selection */
    option = ajStrGetCharFirst(strategy);
    if(option == 'F')
	ajStrAppendC(&cmd, " -f ");
    else if(option == 'G')
	ajStrAppendC(&cmd, " -g ");
    else if(option == 'S')
	ajStrAppendC(&cmd, " -s ");
    /* else go with default ('D' option in ACD file) */
    ajFmtPrintAppS(&cmd, " --pbswitch %d ", pbswitch);
    ajFmtPrintAppS(&cmd, " --archpri %f ", archpri);
    if(binary)
	ajStrAppendC(&cmd, " --binary ");
    if(fast)
	ajFmtPrintAppS(&cmd, " --fast --gapmax %f ", gapmax);
    if(hand)
	ajStrAppendC(&cmd, " --hand ");
    ajFmtPrintAppS(&cmd, " --idlevel %f ", idlevel);
    if(noeff)
	ajStrAppendC(&cmd, " --noeff ");
    ajFmtPrintAppS(&cmd, " --swentry %f ", swentry);
    ajFmtPrintAppS(&cmd, " --swexit %f ", swexit);
    if(verbosity)
	ajStrAppendC(&cmd, " --verbose ");

    /* ACD option only allows one selection */
    option = ajStrGetCharFirst(weighting);
    if(option == 'B')
	ajStrAppendC(&cmd, " --wblosum ");
    else if(option == 'G')
	ajStrAppendC(&cmd, " --wgsc ");
    else if(option == 'K')
	ajStrAppendC(&cmd, " --wme ");
    else if(option == 'W')
	ajStrAppendC(&cmd, " --wpb ");
    else if(option == 'V')
	ajStrAppendC(&cmd, " --wvoronoi ");
    else if(option == 'N')
	ajStrAppendC(&cmd, " --wnone ");
    if(o)
	ajFmtPrintAppS(&cmd, " -o %s ", ajFileGetNameC(o));
    if(cfile)
	ajFmtPrintAppS(&cmd, " --cfile %s ", ajFileGetNameC(cfile));
    /* -A (append) always set but file will be wiped by EMBOSS first unless 
       append: "Y" is set for "hmmfile" in the ACD file. */
    ajStrAppendC(&cmd, " -A -F ");
    ajFmtPrintAppS(&cmd, " %S %S", hmmfilename, ajSeqsetGetFilename(alignfile));


    /* 4. Close ACD files */
    ajSeqsetDel(&alignfile);
    ajFileClose(&prior);
    ajFileClose(&null);
    ajFileClose(&pam);
    ajFileClose(&hmmfile);
    ajFileClose(&o);
    ajFileClose(&cfile);


    /* 5. Call hmmbuild */
    ajFmtPrint("\n%S\n", cmd); 
    ajSysExecS(cmd);    


    /* 6. Exit cleanly */
    ajStrDel(&nhmm);
    ajStrDel(&cmd);
    ajStrDel(&rnd1);
    ajStrDel(&rnd2);
    ajStrDel(&fmt);
    ajStrDel(&hmmfilename);
    ajStrDel(&strategy);
    ajStrDel(&weighting);
    
    embExit();

    return 0;
}