Esempio n. 1
0
static AjBool dbxflat_ParseEmbl(EmbPBtreeEntry entry, AjPFile inf)
{
    AjPStr line = NULL;
    ajlong pos  = 0L;
    
    line = ajStrNewC("");
    
    while(!ajStrPrefixC(line,"//"))
    {
	pos = ajFileResetPos(inf);
	
	if(!ajReadlineTrim(inf,&line))
	{
	    ajStrDel(&line);
	    return ajFalse;
	}
	if(ajStrPrefixC(line,"ID"))
	{
	    entry->fpos = pos;
	    ajFmtScanS(line,"%*S%S",&entry->id);
	    ajStrTrimEndC(&entry->id, ";");
/*
	    ++global;
	    printf("%d. %s\n",global,ajStrGetPtr(entry->id));
*/
	    if(entry->do_sv)
		embBtreeEmblSV(line,entry->sv);
	}


	if(entry->do_sv)
	    if(ajStrPrefixC(line,"SV") ||
	       ajStrPrefixC(line,"IV"))	/* emblcds database format */
		embBtreeEmblAC(line,entry->sv);

	if(entry->do_accession)
	    if(ajStrPrefixC(line,"AC") ||
	       ajStrPrefixC(line,"PA"))	/* emblcds database format */
		embBtreeEmblAC(line,entry->ac);
	
	if(entry->do_keyword)
	    if(ajStrPrefixC(line,"KW"))
		embBtreeEmblKW(line,entry->kw,entry->kwlen);

	if(entry->do_description)
	    if(ajStrPrefixC(line,"DE"))
		embBtreeEmblDE(line,entry->de,entry->delen);

	if(entry->do_taxonomy)
	    if(ajStrPrefixC(line,"OC") || ajStrPrefixC(line,"OS"))
		embBtreeEmblTX(line,entry->tx,entry->txlen);
    }
    

    ajStrDel(&line);
    
    return ajTrue;
}
Esempio n. 2
0
static AjBool dbxflat_ParseEmbl(EmbPBtreeEntry entry, AjPFile inf)
{
    AjPStr line = NULL;
    ajlong pos  = 0L;
    
    line = ajStrNewC("");
    
    while(!ajStrPrefixC(line,"//"))
    {
	pos = ajFileResetPos(inf);
	
	if(!ajReadlineTrim(inf,&line))
	{
	    ajStrDel(&line);
	    return ajFalse;
	}
	if(ajStrPrefixC(line,"ID"))
	{
	    entry->fpos = pos;
	    ajFmtScanS(line,"%*S%S",&entry->id);
	    ajStrTrimEndC(&entry->id, ";");
/*
	    ++global;
	    printf("%d. %s\n",global,ajStrGetPtr(entry->id));
*/
	    if(svfield)
		embBtreeEmblSV(line,svfield->data);
	}


	if(svfield)
	    if(ajStrPrefixC(line,"SV") ||
	       ajStrPrefixC(line,"IV"))	/* emblcds database format */
		embBtreeEmblAC(line,svfield->data);

	if(accfield)
	    if(ajStrPrefixC(line,"AC") ||
	       ajStrPrefixC(line,"PA"))	/* emblcds database format */
		embBtreeEmblAC(line,accfield->data);
	
	if(keyfield)
	    if(ajStrPrefixC(line,"KW"))
		embBtreeEmblKW(line,keyfield->data,keyfield->len);

	if(desfield)
	    if(ajStrPrefixC(line,"DE"))
		embBtreeEmblDE(line,desfield->data,desfield->len);

	if(orgfield)
	    if(ajStrPrefixC(line,"OC") || ajStrPrefixC(line,"OS"))
		embBtreeEmblTX(line,orgfield->data,orgfield->len);
    }
    

    ajStrDel(&line);
    
    return ajTrue;
}
Esempio n. 3
0
static void acdrelations_readtypefile
            (AjPFile inf, 
	     PKtype *T)
{
    AjPStr     line    = NULL;
    PKtypedat  dattmp  = NULL;
    AjPList    datlist = NULL;
    
    if(!T)
        ajFatal("Null arg error 1 in acdrelations_readtypefile");
    if(!inf)
        ajFatal("Null arg error 3 in acdrelations_readtypefile");


    /* Allocate memory */
    line           = ajStrNew();
    datlist        = ajListNew();

    
    /* Read data from file */
    while(ajReadline(inf,&line))
    {
        /* Discard comment lines */
        if(ajStrPrefixC(line,"#")) 
            continue;

        /* Create object for holding line */
        dattmp = ajKtypedatNew();
        
        /* Tokenise line delimited by '|'
           Parse first token (value of knowntype: attribute) */
        ajStrAssignS(&dattmp->ktype, ajStrParseC(line, "|"));
        ajStrRemoveSetC(&dattmp->ktype, "_");
        ajStrRemoveWhite(&dattmp->ktype);
        
        /* Parse second token (ACD datatype) */
        ajStrAssignS(&dattmp->acdtype, ajStrParseC(NULL, "|"));

        /* Parse third token (EDAM relations: value ) */
        ajStrAssignS(&dattmp->edam, ajStrParseC(NULL, "|"));

        /* Push line onto list */
        ajListPushAppend(datlist, dattmp);
    }
    

    /* Write PKtype structure */
    ((*T)->n) = ajListToarray(datlist, (void***) &((*T)->dat));
  
    
    /* Free memory */
    ajStrDel(&line);
    ajListFree(&datlist);

    return;
}
Esempio n. 4
0
int main(int argc, char *argv[])
{
  embInitPV("genret", argc, argv, "GEMBASSY", "1.0.3");

  AjPSeqall seqall;
  AjPSeq seq      = NULL;
  AjPStr inseq    = NULL;
  AjPStr gene     = NULL;
  AjPStr access   = NULL;
  AjBool accid    = ajTrue;
  AjPStr argument = NULL;
  AjPFile outfile = NULL;

  AjPStr seqid  = NULL;
  AjPStr restid = NULL;

  AjBool valid = ajFalse;
  AjBool isseq = ajFalse;
  AjBool isgbk = ajFalse;

  AjPFilebuff buff = NULL;
  AjPFile  tmpfile = NULL;
  AjPStr   tmpname = NULL;

  AjPStr regexstr = NULL;
  AjPStrTok token = NULL;
  AjPRegexp regex = NULL;

  AjPStr url  = NULL;
  AjPStr base = NULL;
  AjPStr head = NULL;
  AjPStr line = NULL;

  seqall   = ajAcdGetSeqall("sequence");
  access   = ajAcdGetString("access");
  gene     = ajAcdGetString("gene");
  argument = ajAcdGetString("argument");
  accid    = ajAcdGetBoolean("accid");
  outfile  = ajAcdGetOutfile("outfile");

  if(
     ajStrMatchC(access, "translation") ||
     ajStrMatchC(access, "get_exon") ||
     ajStrMatchC(access, "get_exons") ||
     ajStrMatchC(access, "get_cdsseq") ||
     ajStrMatchC(access, "get_gbkseq") ||
     ajStrMatchC(access, "get_geneseq") ||
     ajStrMatchC(access, "get_intron") ||
     ajStrMatchC(access, "getseq") ||
     ajStrMatchC(access, "seq") ||
     ajStrMatchC(access, "around_startcodon") ||
     ajStrMatchC(access, "around_stopcodon") ||
     ajStrMatchC(access, "before_startcodon") ||
     ajStrMatchC(access, "before_stopcodon") ||
     ajStrMatchC(access, "after_startcodon") ||
     ajStrMatchC(access, "after_stopcodon")
     )
    {
      isseq = ajTrue;
    }
  else if(ajStrMatchC(access, "annotate") ||
          ajStrMatchC(access, "output"))
    {
      isgbk = ajTrue;
    }
  else
    {
      ajFmtPrintF(outfile, "gene,%S\n", access);
    }

  base = ajStrNewC("rest.g-language.org");

  ajStrExchangeCC(&argument, " ", "/");
  ajStrExchangeCC(&argument, ",", "/");
  ajStrExchangeCC(&argument, "\t", "/");
  ajStrExchangeCC(&argument, "\r", "/");
  ajStrExchangeCC(&argument, "\n", "/");

  if(ajStrMatchC(gene, "*"))
    {
      ajStrInsertK(&gene, 0, '.');
    }

  if(ajStrPrefixC(gene, "@") || ajStrPrefixC(gene, "list::"))
    {
      ajStrExchangeCC(&gene, "@", "");
      ajStrExchangeCC(&gene, "list::", "");
      ajStrAssignS(&tmpname, gene);

      tmpfile = ajFileNewInNameS(tmpname);

      if(!tmpfile)
        {
          ajDie("List file (%S) open error\n", tmpname);
        }

      gene = ajStrNew();

      while(ajReadline(tmpfile, &line))
        {
          ajStrAppendS(&gene, line);
        }

      ajFileClose(&tmpfile);
      ajStrDel(&tmpname);
      ajStrDel(&line);
    }

  tmpname = ajStrNew();
  gAssignUniqueName(&tmpname);

  while(ajSeqallNext(seqall, &seq))
    {
      inseq = ajStrNew();

      if(!accid)
        {
          if(gFormatGenbank(seq, &inseq))
            {
              tmpfile = ajFileNewOutNameS(tmpname);

              if(!tmpfile)
                {
                  ajDie("Output file (%S) open error\n", tmpname);
                }

              ajFmtPrintF(tmpfile, "%S", inseq);

              ajFileClose(&tmpfile);

              ajFmtPrintS(&url, "http://%S/upload/upl.pl", base);

              gFilePostSS(url, tmpname, &restid);

              ajStrDel(&url);

              ajSysFileUnlinkS(tmpname);
            }
          else
            {
              ajWarn("Sequence does not have features\n"
                     "Proceeding with sequence accession ID\n");
              accid = ajTrue;
            }
        }


      ajStrAssignS(&seqid, ajSeqGetAccS(seq));

      if(ajStrGetLen(seqid) == 0)
        {
          ajStrAssignS(&seqid, ajSeqGetNameS(seq));
        }

      if(ajStrGetLen(seqid) == 0)
        {
          ajWarn("No valid header information\n");
        }

      if(accid)
        {
          ajStrAssignS(&restid, seqid);
          if(ajStrGetLen(seqid) == 0)
            {
              ajDie("Cannot proceed without header with -accid\n");
            }

          if(!gValID(seqid))
            {
              ajDie("Invalid accession ID:%S, exiting\n", seqid);
            }
        }

      url = ajStrNew();

      if(isgbk)
        {
          ajFmtPrintS(&url, "http://%S/%S/%S", base, restid, access);
        }
      else
        {
          ajFmtPrintS(&url, "http://%S/%S/*/%S/%S", base, restid, access, argument);
        }

      if(!gFilebuffURLS(url, &buff))
        {
          ajDie("GET error from %S\n", url);
        }

      while(ajBuffreadLine(buff, &line))
        {
          if(isgbk){
            ajFmtPrintF(outfile, "%S", line);
            continue;
          }

          ajStrRemoveLastNewline(&line);

          regex = ajRegCompC("^>");

          if(ajRegExec(regex, line))
            {
              head = ajStrNew();

              ajStrAssignS(&head, line);
              ajStrTrimStartC(&head, ">");

              valid = ajFalse;

              token = ajStrTokenNewC(ajStrNewS(gene), " ,\t\r\n");

              while(ajStrTokenNextParse(token, &regexstr))
                {
                  if(ajStrGetLen(regexstr))
                    {
                      regex = ajRegComp(regexstr);

                      if(ajRegExec(regex, line))
                        {
                          valid = ajTrue;
                          if(ajStrIsAlnum(regexstr))
                            {
                              ajStrExchangeSC(&gene, regexstr, "");
                            }
                        }

                      ajRegFree(&regex);
                    }
                }
            }
          else
            {
              if(valid)
                {
                  if(isseq)
                    {
                      ajStrFmtWrap(&line, 60);
                      ajFmtPrintF(outfile, ">%S\n%S\n", head, line);
                    }
                  else
                    {
                      ajFmtPrintF(outfile, "%S,%S\n", head, line);
                    }

                  valid = ajFalse;
                }
            }
        }

      ajFileClose(&outfile);

      ajStrDel(&restid);
      ajStrDel(&seqid);
      ajStrDel(&inseq);
    }

  ajSeqallDel(&seqall);
  ajSeqDel(&seq);
  ajStrDel(&access);
  ajStrDel(&gene);

  embExit();
}
Esempio n. 5
0
/* @prog ssematch ***********************************************************
**
** Searches a DCF file (domain classification file) for secondary structure 
** matches.
**
****************************************************************************/
int main(int argc, char **argv)
{
    /* Variables declarations */
    AjPFile dcfin        = NULL; /* Domain classification file */
    AjPFile ssin         = NULL; /* Secondary structure input file*/
    AjPMatrixf matrix    = NULL; /* Substitution matrix */
    AjPFile out_ss       = NULL; /* For ss top matches*/
    AjPFile out_se       = NULL; /* For se top matches*/
    AjPFile outfile      = NULL; /* Output file*/
    AjPFile logf         = NULL; /* Log file */
    float gapopen_sss    = 0.0;  /* Gap insertion penalty */
    float gapopen_sse    = 0.0;
    float gapopen        = 0.0;
    float gapextend_sss  = 0.0;  /* Gap extension penalty */
    float gapextend_sse  = 0.0;
    float gapextend      = 0.0;
    ajint max_hits       = 0;    /* number of top alignments to display*/
    ajint mode           = 0;
    ajint x              = 0;

    AjPScop temp_scop    = NULL; /* scop object pointer*/

    AjPList  scop_list   = NULL; /* list of scop objects for entire domain
				    classification file */
     
    AjIList       iter   = NULL;
    AjPStr        msg    = NULL; /* Pointer to String used for messages */
    AjPStr        line   = NULL;
   
    AjPStr    qse        = NULL; /* query secondary structure elements*/
    AjPStr    qss        = NULL; /* query secondary structure (by residue)*/
    AjPSeq    q3se       = NULL; /* query secondary structure elements, 3-letter 
				    code*/
    AjPSeq    q3ss       = NULL; /* query secondary structure 
				    (by residue), 3-letter code*/
    AjPSeq    query      = NULL;
    




    /* Read data from acd */
    embInitPV("ssematch",argc,argv,"DOMAINATRIX",VERSION);
    dcfin       = ajAcdGetInfile("dcfinfile");
    ssin       = ajAcdGetInfile("ssinfile");
    max_hits      = ajAcdGetInt("maxhits");
    matrix        = ajAcdGetMatrixf("datafile");
    gapopen_sss   = ajAcdGetFloat("rgapopen");
    gapextend_sss = ajAcdGetFloat("rgapextend");
    gapopen_sse   = ajAcdGetFloat("egapopen");
    gapextend_sse = ajAcdGetFloat("egapextend");
    out_ss        = ajAcdGetOutfile("outssfile");
    out_se        = ajAcdGetOutfile("outsefile");
    logf       = ajAcdGetOutfile("logfile");






    /* Create list of scop objects for entire input domain classification file. */
    scop_list  = ajListNew();
    while((temp_scop = (ajScopReadCNew(dcfin, "*"))))
        ajListPushAppend(scop_list,temp_scop);


    /* Error handing if domain classification file was empty. */
    if(!(ajListGetLength(scop_list)))
    {
      
        ajWarn("Empty list from scop input file\n");
	ajFileClose(&dcfin);
	ajFileClose(&ssin);
	ajMatrixfDel(&matrix);
       	ajFileClose(&out_ss);
	ajFileClose(&out_se);
       	ajFileClose(&logf);
	while(ajListPop(scop_list, (void *) &temp_scop))
	    ajScopDel(&temp_scop);
	ajListFree(&scop_list);
        ajListIterDel(&iter);    
	
	ajExit();
	return 1;
    }

    /* Error handling in case of empty query file. */
    if(ssin == NULL)   
    {
        ajWarn("Empty secondary structure query file\n");
	ajFileClose(&dcfin);
	ajFileClose(&ssin);
	ajMatrixfDel(&matrix);
       	ajFileClose(&out_ss);
	ajFileClose(&out_se);
       	ajFileClose(&logf);
	while(ajListPop(scop_list, (void *) &temp_scop))
	    ajScopDel(&temp_scop);
	ajListFree(&scop_list);
        ajListIterDel(&iter);    
	
	ajExit();
	return 1; 
    }      
    
    /* Assign sequences in query file to sequence objects. */
    qse = ajStrNew();
    qss = ajStrNew();
    
    while(ajReadlineTrim(ssin,&line))
    {
        /* SE string */
        if(ajStrPrefixC(line,"SE"))
	{
	    ajFmtScanS(line, "%*s %S", &qse);
            /* Convert this string to 3-letter code & then convert to AjPSeq 
	       object. */
            q3se = ssematch_convertbases(qse);
        }
        /* SS string */
	else if(ajStrPrefixC(line,"SS"))
	{
            while((ajReadlineTrim(ssin,&line)) && !ajStrPrefixC(line,"XX"))
                ajStrAppendS(&qss,line);
            ajStrRemoveWhite(&qss);

            /* Convert this string to 3-letter code & then to AjPSeq object. */
            q3ss = ssematch_convertbases(qss);
      	}
    }



    /* For se & then for ss, modes 0 & 1. */
    for(mode = 0; mode <= 1; mode++)
    {
        /* Assign arguments for alignment function. */
        if (mode == 0) 
        {
            query = q3se;
            gapopen = gapopen_sse; 
            gapextend = gapextend_sse;
            outfile =  out_se;
        } 
        else if(mode == 1)
        {
            query = q3ss;  
            gapopen = gapopen_sss; 
            gapextend = gapextend_sss;
            outfile =  out_ss;   
        }


        /* Iterate through list of scop objects & calculate alignment scores. */
        iter=ajListIterNew(scop_list);
        while((temp_scop=(AjPScop)ajListIterGet(iter)))
        {
            /* The function extracts the se (mode 0) or ss (mode 1) subject 
	       sequences from the scop object, performs a Needleman-Wunsch 
	       global alignment with the query sequence & allocates the score 
	       to the Score element of the scop object*/ 

	    if(!(ssematch_NWScore(temp_scop , query, mode, matrix, gapopen, gapextend)))

	    {
                ajFmtPrintF(logf, "%-15s\n", "ALIGNMENT");
                ajFmtPrintF(logf, "Could not align sequence in scop domain %S\n ", 
			    temp_scop->Entry); 
	        ajFmtPrintS(&msg, "Could not align sequence in scop domain %S\n ", 
			    temp_scop->Entry);
	        ajWarn(ajStrGetPtr(msg));
                continue;
	    }
	}


        ajListIterDel(&iter);
      	temp_scop    = NULL;


        /* Sort list of Scop objects by Score */
        ajListSort(scop_list, ssematch_CompScoreInv);
	
	
        iter=ajListIterNew(scop_list);
        /* Write top-scoring hits to outfile. */
        for(x=0; x < max_hits; x++ )
	{
            temp_scop=(AjPScop)ajListIterGet(iter);
 
	    /* Print score to output file. */
	    ajFmtPrintF(outfile, "XX   ALIGNMENT SCORE %.3f\nXX\n", temp_scop->Score);
	    
	    /* Could also write alignment - later modification. */
	    if(!ajScopWrite(outfile, temp_scop))
		ajFatal("Could not write output file %S\n", outfile);

        
        }

	ajListIterDel(&iter); 
     	temp_scop    = NULL;
    }


      
    /* Memoryt management. */
    ajFileClose(&dcfin);
    ajFileClose(&ssin);
    ajMatrixfDel(&matrix);
    ajFileClose(&out_ss);
    ajFileClose(&out_se);
    ajFileClose(&logf);
    while(ajListPop(scop_list, (void *) &temp_scop))
	ajScopDel(&temp_scop);
    ajListFree(&scop_list);
    ajStrDel(&msg);
    ajStrDel(&line);
    ajStrDel(&qse); 
    ajStrDel(&qss);
    ajSeqDel(&q3se);
    ajSeqDel(&q3ss);
    

    ajExit();
    return 0;
}
Esempio n. 6
0
static void domainalign_ProcessStampFile(AjPStr in, 
					 AjPStr out,
					 AjPDomain domain, 
					 ajint noden, 
					 AjPFile logf)
{
    AjPFile  outf = NULL;  /* Output file pointer.          */
    AjPFile   inf = NULL;  /* Input file pointer.           */
    AjPStr  temp1 = NULL;  /* Temporary string.             */
    AjPStr  temp2 = NULL;  /* Temporary string.             */
    AjPStr  temp3 = NULL;  /* Temporary string.             */
    AjPStr   line = NULL;  /* Line of text from input file. */
    ajint     blk = 1;     /* Count of the current block in the input file.
			      Block 1 is the numbering and protein sequences, 
			      Block 2 is the secondary structure, 
			      Block 3 is the Very/Less/Post similar records*/
    AjBool     ok = ajFalse;
    
    
    /* Initialise strings. */
    line    = ajStrNew();
    temp1    = ajStrNew();
    temp2    = ajStrNew();
    temp3    = ajStrNew();


    /* Open input and output files. */
    if(!(inf=ajFileNewInNameS(in)))
	ajFatal("Could not open input file in domainalign_ProcessStampFile");
    



    /* Start of code for reading input file. 
       Ignore everything up to first line beginning with 'Number'. */
    while((ajReadlineTrim(inf,&line)))
    {
	/* ajFileReadLine will trim the tailing \n. */
	if((ajStrGetCharPos(line, 1)=='\0'))
	{
	    ok = ajTrue;
	    break;
	}
    }
    
    
    
    /* Read rest of input file. */
    if(ok)
    {
	/* Write DOMAIN classification records to file. */
	if(!(outf=ajFileNewOutNameS(out)))
	 ajFatal("Could not open output file in domainalign_ProcessStampFile");

	
	if((domain->Type == ajSCOP))
	{
	    ajFmtPrintF(outf,"# TY   SCOP\n# XX\n");
	    ajFmtPrintF(outf,"# CL   %S",domain->Scop->Class);
	    ajFmtPrintSplit(outf,domain->Scop->Fold,"\n# XX\n# FO   ",
			    75," \t\n\r");
	    ajFmtPrintSplit(outf,domain->Scop->Superfamily,"# XX\n# SF   ",
			    75," \t\n\r");
	    ajFmtPrintSplit(outf,domain->Scop->Family,"# XX\n# FA   ",
			    75," \t\n\r");
	    ajFmtPrintF(outf,"# XX\n");
	}
	else
	{
	    ajFmtPrintF(outf,"# TY   CATH\n# XX\n");
	    ajFmtPrintF(outf,"# CL   %S",domain->Cath->Class);
	    ajFmtPrintSplit(outf,domain->Cath->Architecture,"\n# XX\n# AR   ",
			    75," \t\n\r");
	    ajFmtPrintSplit(outf,domain->Cath->Topology,"# XX\n# TP   ",75,
			    " \t\n\r");
	    ajFmtPrintSplit(outf,domain->Cath->Superfamily,"# XX\n# SF   ",75,
			    " \t\n\r");
	    ajFmtPrintF(outf,"# XX\n");
	}
	if((domain->Type == ajSCOP))
	{
	    if(noden==1) 
		ajFmtPrintF(outf,"# SI   %d\n# XX",domain->Scop->Sunid_Class);
	    else if(noden==2)
		ajFmtPrintF(outf,"# SI   %d\n# XX",domain->Scop->Sunid_Fold);
	    else if(noden==3)
		ajFmtPrintF(outf,"# SI   %d\n# XX",
			    domain->Scop->Sunid_Superfamily);
	    else if(noden==4) 	
		ajFmtPrintF(outf,"# SI   %d\n# XX",
			    domain->Scop->Sunid_Family);
	    else
		ajFatal("Node number error in domainalign_ProcessStampFile");
	}
	else
	{
	    if(noden==5) 
		ajFmtPrintF(outf,"# SI   %d\n# XX", domain->Cath->Class_Id);
	    else if(noden==6)
		ajFmtPrintF(outf,"# SI   %d\n# XX", domain->Cath->Arch_Id);
	    else if(noden==7)
		ajFmtPrintF(outf,"# SI   %d\n# XX",domain->Cath->Topology_Id);
	    else if(noden==8)
		ajFmtPrintF(outf,"# SI   %d\n# XX",
			    domain->Cath->Superfamily_Id);
	    else if(noden==9)  
		ajFmtPrintF(outf,"# SI   %d\n# XX",domain->Cath->Family_Id);
	    else
		ajFatal("Node number error in domainalign_ProcessStampFile");
	}   



	while((ajReadlineTrim(inf,&line)))
	{
	    /* Increment counter for block of file. */
	    if((ajStrGetCharPos(line, 1)=='\0'))
	    {
		blk++;
		if(blk==4)
		    blk=1;
	    
		continue;
	    }



	    /* Block of numbering line and protein sequences. */
	    if(blk==1)
	    {
		/* Print the number line out as it is. */
		if(ajStrPrefixC(line,"Number"))
		    ajFmtPrintF(outf,"\n# %7s %S\n"," ", line);
		else
		{
		    /* Read only the 7 characters
		       of the domain identifier
		       code in. */
		    ajFmtScanS(line, "%S", &temp1);
		    ajStrAssignSubS(&temp2, temp1, 0, 6);


		    /* Read the sequence. */
		    ajStrAssignSubS(&temp3, line, 13, 69);
		    ajStrExchangeSetCC(&temp3, " ", "X");
		    ajFmtPrintF(logf, "Replaced ' ' in STAMP alignment "
				"with 'X'\n");
		    ajStrFmtUpper(&temp3);
		

		    /* Write domain id code and sequence out. */
		    ajFmtPrintF(outf,"%-15S%7d %S%7d\n",
				temp2, 0, temp3, 0);
		}
	    }
	    /* Secondary structure filled with '????' (unwanted). */
	    else if(blk==2)
	    {
		continue;
	    }
	    /* Similarity lines. */
	    else
	    {
		if(ajStrPrefixC(line,"Post"))
		{
		    /* Read the sequence. */
		    ajStrAssignSubS(&temp3, line, 13, 69);

		    /* Write post similar line out. */
		    ajFmtPrintF(outf,"%-15s%7s %S\n","# Post_similar", " ",
				temp3);
		}
		/* Ignore Very and Less similar lines. */
		else continue;
	    }
	}
    }
    else /* ok == ajFalse. */
    {
	ajWarn("\n***********************************************\n"
	       "* STAMP was called but output file was EMPTY! *\n"
	       "*   NO OUTPUT FILE GENERATED FOR THIS NODE.   *\n"
	       "***********************************************\n");
	ajFmtPrintF(logf, "STAMP called but output file empty.  "
		    "No output file for this node!");
    }
    


    /* Clean up and close input and output files. */
    ajFileClose(&outf);
    ajFileClose(&inf);
    ajStrDel(&line);
    ajStrDel(&temp1);
    ajStrDel(&temp2);
    ajStrDel(&temp3);
    

    /* All done. */
    return;
}
Esempio n. 7
0
/* @funcstatic domainalign_ProcessTcoffeeFile *********************************
**
** Parses tcoffee output.
**
** @param [r] in [AjPStr] Name of TCOFFEE input file
** @param [r] align [AjPStr] Name of sequence alignment file for output
** @param [r] domain [AjPDomain] Domain being aligned
** @param [r] noden [ajint] Node-level of alignment** 
** @param [r] logf [AjPFile] Log file
**
** @return [void] True on success
** @@
****************************************************************************/
static void domainalign_ProcessTcoffeeFile(AjPStr in, 
					   AjPStr align, 
					   AjPDomain domain,
					   ajint noden, 
					   AjPFile logf)
{
    AjPFile  outf = NULL;  /* Output file pointer. */
    AjPFile   inf = NULL;  /* Input file pointer. */
    AjPStr  temp1 = NULL;  /* Temporary string. */
    AjPStr  temp2 = NULL;  /* Temporary string. */
    AjPStr  temp3 = NULL;  /* Temporary string. */
    AjPStr   line = NULL;  /* Line of text from input file. */
    
    
    /* Initialise strings. */
    line    = ajStrNew();
    temp1   = ajStrNew();
    temp2   = ajStrNew();
    temp3   = ajStrNew();



    /* Open input and output files. */
    if(!(inf=ajFileNewInNameS(in)))
        ajFatal("Could not open input file in domainalign_ProcessTcoffeeFile");
    if(!(outf=ajFileNewOutNameS(align)))
        ajFatal("Could not open output file in domainalign_ProcessTcoffeeFile");
    

    /*Write DOMAIN classification records to file*/
    if((domain->Type == ajSCOP))
    {
	ajFmtPrintF(outf,"# TY   SCOP\n# XX\n");
	ajFmtPrintF(outf,"# CL   %S",domain->Scop->Class);
	ajFmtPrintSplit(outf,domain->Scop->Fold,"\n# XX\n# FO   ",75," \t\n\r");
	ajFmtPrintSplit(outf,domain->Scop->Superfamily,"# XX\n# SF   ",75," \t\n\r");
	ajFmtPrintSplit(outf,domain->Scop->Family,"# XX\n# FA   ",75," \t\n\r");
	ajFmtPrintF(outf,"# XX\n");
    }
    else
    {
	ajFmtPrintF(outf,"# TY   CATH\n# XX\n");
	ajFmtPrintF(outf,"# CL   %S",domain->Cath->Class);
	ajFmtPrintSplit(outf,domain->Cath->Architecture,"\n# XX\n# AR   ",75," \t\n\r");
	ajFmtPrintSplit(outf,domain->Cath->Topology,"# XX\n# TP   ",75," \t\n\r");
	ajFmtPrintSplit(outf,domain->Cath->Superfamily,"# XX\n# SF   ",75," \t\n\r");
	ajFmtPrintF(outf,"# XX\n");
    }
    
    if((domain->Type == ajSCOP))
    {
	if(noden==1) 
	    ajFmtPrintF(outf,"# SI   %d\n# XX\n",domain->Scop->Sunid_Class);
	else if(noden==2)
	    ajFmtPrintF(outf,"# SI   %d\n# XX\n",domain->Scop->Sunid_Fold);
	else if(noden==3)
	    ajFmtPrintF(outf,"# SI   %d\n# XX\n",domain->Scop->Sunid_Superfamily);
	else if(noden==4) 	
	    ajFmtPrintF(outf,"# SI   %d\n# XX\n", domain->Scop->Sunid_Family);
	else
	    ajFatal("Node number error in domainalign_ProcessStampFile");
    }	
    else
    {
	    if(noden==5) 
		ajFmtPrintF(outf,"# SI   %d\n# XX\n", domain->Cath->Class_Id);
	    else if(noden==6)
		ajFmtPrintF(outf,"# SI   %d\n# XX\n", domain->Cath->Arch_Id);
	    else if(noden==7)
		ajFmtPrintF(outf,"# SI   %d\n# XX\n",domain->Cath->Topology_Id);
	    else if(noden==8)
		ajFmtPrintF(outf,"# SI   %d\n# XX\n",domain->Cath->Superfamily_Id);
	    else if(noden==9)  
		ajFmtPrintF(outf,"# SI   %d\n# XX\n",domain->Cath->Family_Id);
	    else
		ajFatal("Node number error in domainalign_ProcessStampFile");
    }   


    
    /* Start of code for reading input file. */
    /*Ignore everything up to first line beginning with 'Number'*/
    while((ajReadlineTrim(inf,&line)))
        /* ajFileReadLine will trim the tailing \n. */
        if((ajStrGetCharPos(line, 1)=='\0'))
            break;

    
    /* Read rest of input file. */
    while((ajReadlineTrim(inf,&line)))
    {
      if((ajStrGetCharPos(line, 1)=='\0'))
        continue; 
        
       /* Print the number line out as it is. */
            else if(ajStrPrefixC(line,"CLUSTAL"))
              continue;
	    else if(ajStrPrefixC(line," "))
                ajFmtPrintF(outf,"\n");
        /* write out a block of protein sequences. */
        else
        {
               /* Read only the 7 characters of the domain identifier code in. */
               ajFmtScanS(line, "%S %S", &temp1,&temp3);
                 ajStrAssignSubS(&temp2, temp1, 0, 6);
  
  
         /* Read the sequence
                   ajStrAssignSubS(&temp3, line, 13, 69);
                   ajStrExchangeSetCC(&temp3, " ", "X");
                   ajStrFmtUpper(&temp3);*/
                
  
                   /* Write domain id code and sequence out. */
                   ajFmtPrintF(outf,"%-13S%S\n",temp2, temp3);              
        }
    }
    

    /* Clean up and close input and output files. */
    ajFileClose(&outf);
    ajFileClose(&inf);
    ajStrDel(&line);
    ajStrDel(&temp1);
    ajStrDel(&temp2);
    ajStrDel(&temp3);
    

    /* All done. */
    return;
}
Esempio n. 8
0
int main(ajint argc, char **argv)
{
    AjPList  ccfin        = NULL;  /* List of CCF (input) files.             */

    AjPDir   pdbin        = NULL;  /* Path of pdb input files.               */
    AjPStr   pdbprefix    = NULL;  /* Prefix of pdb input files.             */
    AjPStr   pdb_name     = NULL;  /* Full name (path/name/extension) of 
					 pdb format input file.              */

    AjPDirout ccfout     = NULL;   /* Path of coordinate output file.        */
    AjPStr   randomname  = NULL;   /* Name for temp file tempf.              */
    AjPStr   ccf_this    = NULL; 
    AjPStr   exec        = NULL; 
    AjPStr   naccess_str = NULL; 
    AjPStr   line        = NULL;
    AjPStr   syscmd      = NULL;   /* Command line arguments.                */
    AjPStr  *mode        = NULL;   /* Mode of operation from acd.            */

    AjPFile  errf        = NULL;   /* pdbplus error file pointer.            */
    AjPFile  serrf       = NULL;   /* stride error file pointer.             */
    AjPFile  nerrf       = NULL;   /* stride error file pointer.             */
    AjPFile  tempf       = NULL;   /* Temp file for holding STRIDE output.   */
    AjPFile  ccf_inf     = NULL;   /* Protein coordinate input file.         */
    AjPFile  ccf_outf    = NULL;   /* Protein coordinate output file.        */

    AjIList  iter        = NULL; 

    AjBool   done_naccess= ajFalse;
    AjBool   done_stride = ajFalse;
    AjBool   found       = ajFalse;
    AjPResidue temp_res  = NULL;  /* Pointer to Residue object.                */
    AjPPdb   pdb_old     = NULL;  /* Pointer to PDB object - without new
				     stride elements.                       */
    AjPPdb   pdb         = NULL;  /* Pointer to PDB object.                 */
    ajint    idn         = 0;     /* Chain identifier as a number (1,2,...) */
    ajint    chain_num   = 0;     /* Chain identifier index (0,1,...).      */
    ajint    tS          = 0;     /* User-defined threshold size for SSEs.  */
    ajint    nostride    = 0;     /* No. times stride failed                */
    ajint    nonaccess   = 0;     /* No. times naccess failed               */
    ajint    nofile      = 0;     /* No. times of file error                */

    /* Variables for each item that will be parsed from the ASG line. */
    AjPStr   res      = NULL;  /* Residue id from STRIDE ASG line (ALA etc). */
    AjPStr   res_num  = NULL;  /* PDB residue number from STRIDE ASG line.   */
    char     pcid     = ' ';   /* Protein chain identifier from STRIDE or 
				  NACESS output (A,B, etc).                  */
    char     ss       = ' ';   /* One-letter secondary structure code from 
				  STRIDE ASG line.                           */
    float    ph       = 0.0;   /* Phi angle from STRIDE ASG line.            */
    float    ps       = 0.0;   /* Psi angle from STRIDE ASG line.            */
    float    sa       = 0.0;   /* Residue solvent accessible area from STRIDE 
				  ASG line.                                  */
    float    f1       = 0;
    float    f2       = 0;
    float    f3       = 0;
    float    f4       = 0;
    float    f5       = 0;
    float    f6       = 0;
    float    f7       = 0;
    float    f8       = 0;
    float    f9       = 0;
    float    f10      = 0;





    /* Allocate strings; this section is used for variables that are 
       allocated once only. */
    pdb_name       = ajStrNew();
    res            = ajStrNew();
    res_num        = ajStrNew();
    randomname     = ajStrNew();
    syscmd         = ajStrNew();
    line           = ajStrNew();  
    naccess_str    = ajStrNew();
    exec           = ajStrNew();





    /* Read data from acd. */
    embInitPV("pdbplus",argc,argv,"STRUCTURE",VERSION);

    ccfin        = ajAcdGetDirlist("ccfinpath");  
    pdbin        = ajAcdGetDirectory("pdbindir"); 
    pdbprefix    = ajAcdGetString("pdbprefix");
    ccfout       = ajAcdGetOutdir("ccfoutdir");
    mode         = ajAcdGetList("mode");
    errf         = ajAcdGetOutfile("logfile");
    if(ajStrGetCharFirst(*mode) != '2')
	serrf    = ajAcdGetOutfile("slogfile");
    if(ajStrGetCharFirst(*mode) != '1')
	nerrf    = ajAcdGetOutfile("nlogfile");
    tS           = ajAcdGetInt("thresholdsize");
 

    


    
    ajRandomSeed();
    ajFilenameSetTempname(&randomname); 




    
    /* 
     **  Start of main application loop. 
     **  Process each PDB/ protein coordinate file (EMBL format) in turn. 
     */ 
    
    while(ajListPop(ccfin,(void **)&ccf_this))
    {
        /* Open protein coordinate file.  If it cannot be opened, write a 
           message to the error file, delete ccf_this and continue. */

        if((ccf_inf = ajFileNewInNameS(ccf_this)) == NULL)   
	{
	    ajWarn("%s%S\n//\n", 
		   "clean coordinate file not found: ", ccf_this);
	    
	    ajFmtPrintF(errf, "%s%S\n//\n", 
                        "clean coordinate file not found: ", ccf_this); 
            ajStrDel(&ccf_this); 
	    nofile++;
	    continue; 
        }       

        ajFmtPrint("Processing %S\n", ccf_this);
	fflush(stdout);

        /* Parse protein coordinate data (from clean format file) into 
	   AjPPdb object.  ajPdbReadAllModelsNew will create the AjPPdb object. */
      if(!(pdb_old=ajPdbReadAllModelsNew(ccf_inf)))
        {
	    ajWarn("ERROR Clean coordinate file read" 
		   "error: %S\n//\n", ccf_this);
            ajFmtPrintF(errf, "ERROR Clean coordinate file read" 
			"error: %S\n//\n", ccf_this);
            ajFileClose(&ccf_inf);
            ajStrDel(&ccf_this); 
	    nofile++;
            continue;
        }

        ajFileClose(&ccf_inf);
        ajPdbCopy(&pdb, pdb_old); 
        ajPdbDel(&pdb_old); 

        /* Construct name of corresponding PDB file.
	    NACCESS does *not* generate an output file if the path is './' e.g. 
	    naccess ./1rbp.ent , therefore replace './' with null. */
	ajStrAssignS(&pdb_name, ajDirGetPath(pdbin));
	if(ajStrMatchC(pdb_name, "./") || ajStrMatchC(pdb_name, "."))
	    ajStrAssignC(&pdb_name, "");
	
        ajStrAppendS(&pdb_name, pdbprefix);
	ajStrFmtLower(&pdb->Pdb);
        ajStrAppendS(&pdb_name, pdb->Pdb);
        ajStrAppendC(&pdb_name, ".");
	ajStrAppendS(&pdb_name, ajDirGetExt(pdbin));
	

        /* Check corresponding PDB file exists for reading using ajFileStat. */
	if(!(ajFilenameExistsRead(pdb_name)))
        {
            ajFmtPrintF(errf, "%s%S\n//\n", "PDB file not found: ", pdb_name);
            ajWarn("%s%S\n//\n", "PDB file not found: ", pdb_name);
            ajStrDel(&ccf_this); 
            ajPdbDel(&pdb);
	    nofile++;
            continue;
        }
        
	if(ajStrGetCharFirst(*mode) != '2')
        {        
	    /* 
	     **  Create a string containing the STRIDE command line (it needs
	     **  PDB file name & name of temp output file).
	     **  Call STRIDE by using ajSystem.
	     */
	    
	    ajFmtPrintS(&syscmd, "%S %S -f%S >> %s 2>&1",  
			ajAcdGetpathC("stride"),
                        pdb_name, randomname, ajFileGetNameC(serrf));
	    ajFmtPrint("%S %S -f%S >> %s 2>&1\n",  
		       ajAcdGetpathC("stride"),
                       pdb_name, randomname,ajFileGetNameC(serrf));
	    system(ajStrGetPtr(syscmd));  

	    
	    /* Open the stride output file */
	    if (((tempf = ajFileNewInNameS(randomname)) == NULL))
	    {
		ajWarn("%s%S\n//\n", 
		       "no stride output for: ", pdb_name); 
		ajFmtPrintF(errf, "%s%S\n//\n", 
			    "no stride output for: ", pdb_name); 
		nostride++;
		ajStrDel(&ccf_this);
		ajPdbDel(&pdb); 
		continue; 
	    } 
	    else
	      ajFmtPrintF(errf, "%s%S\n//\n", 
			  "stride output for: ", pdb_name); 

	    
	    done_stride = ajFalse;

	    /* Parse STRIDE output from temp output file a line at a time. */
	    while(ajReadlineTrim(tempf,&line))
	    {       
		if(ajStrPrefixC(line,"ASG"))    
		{
		    ajFmtScanS(line, "%*S %S  %c %S %*d %c %*S %f %f %f %*S", 
			       &res, &pcid, &res_num, &ss, &ph, &ps, &sa);
                
		    /* 
		     **  Populate pdbplus object with the data from this parsed
		     **  line. This means first identifying the chain, then 
		     **  finding the residue. 
		     */
                
		    /* Determine the chain number. ajDmxPdbplusChain does not 
		       recognise '-', so change '-' to '.'  */
		    if (pcid == '-')
			pcid = '.'; 

		    /* Get chain number from the chain identifier. */
		    if(!ajPdbChnidToNum(pcid, pdb, &idn)) 
		    {
			ajWarn("Could not convert chain id %c to chain"
			       " number in pdb file %S\n//\n", 
			       pcid, pdb_name);
			ajFmtPrintF(errf, "Could not convert chain id %c "
				    "to chain number in pdb file %S\n//\n", 
				    pcid, pdb_name);
			continue;
		    }
                    
		    /* 
		     **  The chain number that will get written starts at 1, but
		     **  we want an index into an array which must start at 0, 
		     **  so subtract 1 from the chain number to get the index. 
		     */
		    chain_num = idn-1; 
                  
		    /* 
		     **   Iiterate through the list of residues in the Pdb object,
		     **   found switches to true when first residue corresponding 
		     **   to the line is found. 
		     */

		    /* iter = ajListIterNewread(pdb->Chains[chain_num]->Atoms); */
		    iter = ajListIterNewread(pdb->Chains[chain_num]->Residues);
		    found = ajFalse; 

		    while((temp_res = (AjPResidue)ajListIterGet(iter)))
		    {
		        /* If we have found the residue we want */
			if((ajStrMatchS(res_num, temp_res->Pdb) && 
			    ajStrMatchS(res, temp_res->Id3)))
			{
                       	    done_stride = ajTrue;
			    found = ajTrue;
			    temp_res->eStrideType = ss;
			    temp_res->Phi  = ph;
			    temp_res->Psi  = ps;
			    temp_res->Area = sa;
			}                 
			/* If the matching residue has been processed
			   move on to next ASG line, next residue. */
			else if(found == ajTrue) 
			    break;	
			else 
			/* Matching residue not found yet. */       
			    continue;	
		    }
		    ajListIterDel(&iter);
		} /* End of if ASG loop. */ 
	    } /* End of while line loop. */
	    

	    if(done_stride)
	      ajFmtPrintF(errf, "%s%S\n//\n", 
			  "stride data for: ", pdb_name); 
	    else
	      {
		ajFmtPrintF(errf, "%s%S\n//\n", 
			    "no stride data for: ", pdb_name); 
		ajWarn("%s%S\n//\n", "no stride data for: ", pdb_name);
		nostride++;
	      }


	    /* Close STRIDE temp file. & tidy up. */
	    ajFileClose(&tempf);

	    /* Remove temporary file (stride output file). */
	    ajFmtPrintS(&exec, "rm %S", randomname); 
	    ajSysSystem(exec); 
	    
	    /* 
	     **  Calculate element serial numbers (eStrideNum)& ammend residue
	     **  objects, count no's of elements and ammend chain object 
	     **  (numHelices, num Strands). 
	     */
	    pdbplus_sort(pdb, tS);
	}
	

	if(ajStrGetCharFirst(*mode) != '1')
        {        
	    /* 
	     **   Create a string containing the NACCESS command line (it needs
	     **   PDB file name & name of temp output file) & call NACCESS.
	     **   If e.g. /data/structure/pdbfred.ent was parsed and the program
	     **   was run from /stuff, then /stuff/fred.asa and /stuff/fred.rsa
	     **   would be written.  These must be deleted once parsed (only
	     **   use the .rsa file here). 
	     */
	    
	    ajFmtPrintS(&syscmd, "%S %S  >> %s 2>&1",  
			ajAcdGetpathC("naccess"), pdb_name, 
			ajFileGetNameC(nerrf));
	    ajFmtPrint("%S %S  >> %s 2>&1\n",  
		       ajAcdGetpathC("naccess"), pdb_name, 
		       ajFileGetNameC(nerrf));
	    system(ajStrGetPtr(syscmd));  


	    
	    ajStrAssignS(&naccess_str, pdbprefix);
	    ajStrAppendS(&naccess_str, pdb->Pdb);
	    ajStrAppendC(&naccess_str, ".rsa");
	    
	    /* Open the NACCESS output file. */
	    if (((tempf = ajFileNewInNameS(naccess_str)) == NULL))
	    {
		ajFmtPrintF(errf, "%s%S\n//\n", 
			    "no naccess output for: ", pdb_name); 
		ajWarn("%s%S\n//\n", "no naccess output for: ", pdb_name);
		nonaccess++;
		ajStrDel(&ccf_this);
		ajPdbDel(&pdb); 
		continue; 
	    }	 
	    else
	      ajFmtPrintF(errf, "%s%S\n//\n", 
			  "naccess output for: ", pdb_name); 


	    done_naccess = ajFalse;
	    /* Parse NACCESS output from temp output file a line at a time. */	    
	    while(ajReadlineTrim(tempf,&line))
	    {       
		if(ajStrPrefixC(line,"RES"))    
		{
		    /* Read data from lines. */
		    if((pcid = line->Ptr[8]) == ' ')
		      ajFmtScanS(line, "%*S %S %S %f %f %f "
				 "%f %f %f %f %f %f %f", 
				 &res, &res_num, &f1, &f2, &f3, &f4, &f5, 
				 &f6, &f7, &f8, &f9, &f10);
		    else
		      ajFmtScanS(line, "%*S %S %*c %S %f %f "
				 "%f %f %f %f %f %f %f %f", 
				 &res, &res_num, &f1, &f2, &f3, &f4, &f5, 
				 &f6, &f7, &f8, &f9, &f10);

		    /* Identify the chain, then finding all the residues 
		       corresponding to the residue. */
                
		    /* Get the chain number from the chain identifier. */
		    if(!ajPdbChnidToNum(pcid, pdb, &idn))
		    {
                        ajWarn("Could not convert chain id %c to chain"
				    " number in pdb file %S\n//\n", 
			       pcid, pdb_name);	
			ajFmtPrintF(errf, "Could not convert chain id"
				    " %c to chain number in pdb file %S\n//\n",
				    pcid, pdb_name);
			continue;
		    }
                    

                  
		    /* 
		     **  Chain number will start at 1, but we want an index 
		     **  into an array which must start at 0, so subtract 1 
		     **  from the chain number to get the index.
		     */
		    chain_num = idn-1; 



		    /* 
		     **   Iiterate through the list of residues in the Pdb object,
		     **   temp_res is an AjPResidue used to point to the current
		     **   residue.
		     **   ajBool found switches to true when first residue 
		     **   corresponding to the line is found. 
		     */
		    iter = ajListIterNewread(pdb->Chains[chain_num]->Residues);

		    found = ajFalse; 
		    while((temp_res = (AjPResidue)ajListIterGet(iter)))
		    {
			/* If we have found the residue we want, write the residue 
			   object. */
			if((ajStrMatchS(res_num, temp_res->Pdb) && 
			    ajStrMatchS(res, temp_res->Id3)))
                        {
			    found = ajTrue;
			    done_naccess = ajTrue;
			    temp_res->all_abs  = f1;
			    temp_res->all_rel  = f2;
			    temp_res->side_abs = f3;
			    temp_res->side_rel = f4;
			    temp_res->main_abs = f5;
			    temp_res->main_rel = f6;
			    temp_res->npol_abs = f7;
			    temp_res->npol_rel = f8;
			    temp_res->pol_abs  = f9;
			    temp_res->pol_rel  = f10;

			}      
			/* If the matching residues have all been processed. 
			   move on to next ASG line, next residue. */
			else if(found == ajTrue) 
			    break;	
			else 
			    /* Matching residues not found yet, move on to next 
			       residue. */
			    continue;	 
		    }
		    ajListIterDel(&iter);
		} 
	    } 
	    
	    if(done_naccess)
		ajFmtPrintF(errf, "%s%S\n//\n", 
			    "naccess data for: ", pdb_name); 
	    else
	    {
		ajFmtPrintF(errf, "%s%S\n//\n", 
			    "no naccess data for: ", pdb_name); 
		ajWarn("%s%S\n//\n", "no naccess data for: ", pdb_name);
		nonaccess++;
	    }

	    /* Remove temporary file (naccess output files). */
	    ajFileClose(&tempf);
	    
	    ajFmtPrintS(&exec, "rm %S", naccess_str); 
	    ajSysSystem(exec); 

	    ajStrAssignS(&naccess_str, pdbprefix);
	    ajStrAppendS(&naccess_str, pdb->Pdb);
	    ajStrAppendC(&naccess_str, ".asa");
	    ajFmtPrintS(&exec, "rm %S", naccess_str);
	    ajSysSystem(exec); 

	    ajStrAssignS(&naccess_str, pdbprefix);
	    ajStrAppendS(&naccess_str, pdb->Pdb);
	    ajStrAppendC(&naccess_str, ".log");
	    ajFmtPrintS(&exec, "rm %S", naccess_str);
	    ajSysSystem(exec); 
	}

        /* Open CCF (output) file. */
        ccf_outf = ajFileNewOutNameDirS(pdb->Pdb, ccfout);
	
        
        /* Write AjPPdb object to the output file in clean format. */
        if(!ajPdbWriteAll(ccf_outf, pdb))
        {               
	    ajWarn("%s%S\n//\n","Could not write results file for: ", 
                        pdb->Pdb);  
	    
	    ajFmtPrintF(errf,"%s%S\n//\n","Could not write results file for ", 
                        pdb->Pdb);

        }	
	ajFileClose(&ccf_outf);
        ajPdbDel(&pdb);
        ajStrDel(&ccf_this);
    } /* End of main application loop. */


    ajFmtPrint("STRIDE  failures: %d\n", nostride);
    ajFmtPrint("NACCESS failures: %d\n", nonaccess);
    ajFmtPrintF(errf, "\n\nSTRIDE  failures: %d\nNACCESS failures: %d\n",
		nostride, nonaccess);
    

    

    ajListFree(&ccfin);
    ajDirDel(&pdbin);
    ajStrDel(&pdbprefix);
    ajStrDel(&pdb_name);
    ajDiroutDel(&ccfout);
    ajStrDel(&res);
    ajStrDel(&res_num);
    ajStrDel(&randomname);
    ajStrDel(&line);
    ajStrDel(&naccess_str);
    ajStrDel(&exec);
    ajStrDel(&syscmd);
  
    ajFileClose(&errf);
    if(ajStrGetCharFirst(*mode) != '2')
	ajFileClose(&serrf);
    if(ajStrGetCharFirst(*mode) != '1')
	ajFileClose(&nerrf);

    ajStrDel(&mode[0]);
    AJFREE(mode);




    
    ajExit();
    return 0;
} 
Esempio n. 9
0
/* @funcstatic seqwords_TermsRead *********************************************
 **
 ** Read the next Terms object from a file in embl-like format. The search 
 ** terms are modified with a leading and trailing space.
 **
 ** @param [r] inf  [AjPFile]   Input file stream
 ** @param [w] thys [AjPTerms*] Terms object
 **
 ** @return [AjBool] True on succcess
 ** @@
 *****************************************************************************/
static AjBool seqwords_TermsRead(AjPFile inf, 
				 AjPTerms *thys)
{    
    AjPStr   line           =NULL;	/* Line of text. */
    AjPStr   temp           =NULL;
    AjPList  list_terms     =NULL;	/* List of keywords for a scop node*/
    AjBool   ok             =ajFalse;
    AjPStr   type           = NULL;


    /* Memory management */
    (*thys)=seqwords_TermsNew();
    list_terms = ajListstrNew();
    line       = ajStrNew();
    type       = ajStrNew();
    
    /* Read first line. */
    ok = ajReadlineTrim(inf,&line);


    while(ok && !ajStrPrefixC(line,"//"))
    {
	if(ajStrPrefixC(line,"XX"))
	{
	    ok = ajReadlineTrim(inf,&line);
	    continue;
	}	
	else if(ajStrPrefixC(line,"TY"))
	{
	    ajFmtScanS(line, "%*s %S", &type);
	    
	    if(ajStrMatchC(type, "SCOP"))
		(*thys)->Type = ajSCOP;
	    else if(ajStrMatchC(type, "CATH"))
		(*thys)->Type = ajCATH;
	}
	else if(ajStrPrefixC(line,"CL"))
	{
	    ajStrAssignC(&(*thys)->Class,ajStrGetPtr(line)+3);
	    ajStrRemoveWhiteExcess(&(*thys)->Class);
	}
	else if(ajStrPrefixC(line,"AR"))
	{
	    ajStrAssignC(&(*thys)->Architecture,ajStrGetPtr(line)+3);
	    ajStrRemoveWhiteExcess(&(*thys)->Architecture);
	}
	else if(ajStrPrefixC(line,"TP"))
	{
	    ajStrAssignC(&(*thys)->Topology,ajStrGetPtr(line)+3);
	    ajStrRemoveWhiteExcess(&(*thys)->Topology);
	}
	else if(ajStrPrefixC(line,"FO"))
	{
	    ajStrAssignC(&(*thys)->Fold,ajStrGetPtr(line)+3);
	    while(ajReadlineTrim(inf,&line))
	    {
		if(ajStrPrefixC(line,"XX"))
		    break;
		ajStrAppendC(&(*thys)->Fold,ajStrGetPtr(line)+3);
	    }
	    ajStrRemoveWhiteExcess(&(*thys)->Fold);
	}
	else if(ajStrPrefixC(line,"SF"))
	{
	    ajStrAssignC(&(*thys)->Superfamily,ajStrGetPtr(line)+3);
	    while(ajReadlineTrim(inf,&line))
	    {
		if(ajStrPrefixC(line,"XX"))
		    break;
		ajStrAppendC(&(*thys)->Superfamily,ajStrGetPtr(line)+3);
	    }
	    ajStrRemoveWhiteExcess(&(*thys)->Superfamily);
	}
	else if(ajStrPrefixC(line,"FA"))
	{
	    ajStrAssignC(&(*thys)->Family,ajStrGetPtr(line)+3);
	    while(ajReadlineTrim(inf,&line))
	    {
		if(ajStrPrefixC(line,"XX"))
		    break;
		ajStrAppendC(&(*thys)->Family,ajStrGetPtr(line)+3);
	    }
	    ajStrRemoveWhiteExcess(&(*thys)->Family);
	}
	else if(ajStrPrefixC(line,"TE")) 
	{
	    /* Copy and clean up term. */
	    temp    = ajStrNew();
	    ajStrAssignC(&temp,ajStrGetPtr(line)+3);
	    ajStrRemoveWhiteExcess(&temp);

	    
	    /* Append a leading and trailing space to search term*/
	    ajStrAppendK(&temp, ' ');
	    ajStrInsertC(&temp, 0, " ");

	    
	    /* Add the current term to the list. */
	    ajListstrPush(list_terms,temp);		    
	}

	ok = ajReadlineTrim(inf,&line);
    }
    if(!ok)
    {
	/* Clean up. */
	ajListstrFree(&list_terms);
	ajStrDel(&line);
	
    
	/* Return. */
	return ajFalse;
    }
        
    
    /* Convert the AjPList of terms to array of AjPSeq's. */
    if(!((*thys)->N=ajListstrToarray((AjPList)list_terms,&(*thys)->Keywords)))
	ajWarn("Zero sized list of terms passed into seqwords_TermsRead");


    /* Clean up.  Free the list (not the nodes!). */
    ajListstrFree(&list_terms);
    ajStrDel(&line);
    ajStrDel(&type);
    
    return ajTrue;
} 
Esempio n. 10
0
AjPPatlistRegex ajPatlistRegexRead (const AjPStr patspec,
				    const AjPStr patname,
				    const AjPStr fmt,
				    ajuint type, AjBool upper, AjBool lower)
{
    AjPPatlistRegex patlist = NULL;
    AjPStr line = NULL;
    AjPStr pat  = NULL;
    AjPStr name = NULL;
    AjPFilebuff infile = NULL;
    AjPStr patstr = NULL;
    ajuint ifmt;
    ajuint npat = 0;
    AjPStr namestr = NULL;

    ajStrAssignS(&namestr, patname);
    ajStrAssignEmptyC(&namestr, "regex");

    ajStrAssignS(&patstr, patspec);

    patlist = ajPatlistRegexNewType(type);

    ifmt = patternRegexFormat(fmt);

    if(ajStrGetCharFirst(patspec) ==  '@')
    {
	ajStrCutStart(&patstr, 1);
	infile = ajFilebuffNewNameS(patstr);

	if(!infile)
        {
	    ajErr("Unable to open regular expression file '%S'", patstr);
	    return NULL;
	}

	line = ajStrNew();
	pat  = ajStrNew();
	name = ajStrNew();

	if(!ifmt)
	{
	    ajBuffreadLineTrim(infile,&line);

	    if(ajStrPrefixC(line, ">"))
		ifmt = 2;
	    else
		ifmt = 1;
	    ajFilebuffReset(infile);
	}
	
	switch(ifmt)
	{
	case 1:
	    while (ajBuffreadLineTrim(infile,&line))
	    {
		npat++;
		ajStrAppendS (&pat,line);

		if(lower)
		    ajStrFmtLower(&pat);

		if(upper)
		    ajStrFmtUpper(&pat);

		ajFmtPrintS(&name, "%S%u", namestr, npat);
		ajPatternRegexNewList(patlist,name,pat);
		ajStrSetClear(&pat);
	    }
	    break;
	default:
	    while (ajBuffreadLineTrim(infile,&line))
	    {
		if (ajStrFindC(line,">")>-1)
		{
		    npat++;

		    if (ajStrGetLen(name))
		    {
			if(lower)
			    ajStrFmtLower(&pat);

			if(upper)
			    ajStrFmtUpper(&pat);

			ajPatternRegexNewList(patlist,name,pat);
			ajStrSetClear(&name);
			ajStrSetClear(&pat);
		    }
		    ajStrCutStart(&line,1);
		    ajStrAssignS (&name,line);

		    if(!ajStrGetLen(name))
			ajFmtPrintS(&name, "%S%u", namestr, npat);
		}
		else
		    ajStrAppendS (&pat,line);
	    }

	    ajStrAssignEmptyS(&name, patname);
	    ajPatternRegexNewList(patlist,name,pat);
	    ajStrSetClear(&pat);
	    break;
	}

	ajFilebuffDel(&infile);
    }
    else
    {
	ajStrAssignS(&pat, patspec);

	if(lower)
	    ajStrFmtLower(&pat);

	if(upper)
	    ajStrFmtUpper(&pat);

	ajStrAssignS(&name, namestr);
	ajPatternRegexNewList(patlist,name,pat);
    }

    ajStrDel(&name);
    ajStrDel(&namestr);
    ajStrDel(&patstr);
    ajStrDel(&line);
    ajStrDel(&pat);

    return patlist;
}
Esempio n. 11
0
static AjBool dbxflat_ParseGenbank(EmbPBtreeEntry entry, AjPFile inf)
{
    AjPStr line = NULL;
    ajlong pos  = 0L;
    AjBool ret = ajTrue;
    
    AjPStr sumline = NULL;
    
    line = ajStrNewC("");
    sumline = ajStrNew();

    
    while(!ajStrPrefixC(line,"//") && ret)
    {
	if(ajStrPrefixC(line,"LOCUS"))
	{
	    entry->fpos = pos;
	    ajFmtScanS(line,"%*S%S",&entry->id);
	}
	
	if(entry->do_sv)
	    if(ajStrPrefixC(line,"VERSION"))
		embBtreeGenBankAC(line,entry->sv);
	
	if(entry->do_accession)
	    if(ajStrPrefixC(line,"ACCESSION"))
		embBtreeGenBankAC(line,entry->ac);
	
	if(entry->do_keyword)
	    if(ajStrPrefixC(line,"KEYWORDS"))
	    {
		ajStrAssignS(&sumline,line);
		ret = ajReadlineTrim(inf,&line);
                while(ret && *MAJSTRGETPTR(line)==' ')
		{
		    ajStrAppendS(&sumline,line);
		    ret = ajReadlineTrim(inf,&line);
		}
		ajStrRemoveWhiteExcess(&sumline);
		embBtreeGenBankKW(sumline,entry->kw,entry->kwlen);
		continue;
	    }

	if(entry->do_description)
	    if(ajStrPrefixC(line,"DEFINITION"))
	    {
		ajStrAssignS(&sumline,line);
		ret = ajReadlineTrim(inf,&line);
		while(ret && *MAJSTRGETPTR(line)==' ')
		{
		    ajStrAppendS(&sumline,line);
		    ret = ajReadlineTrim(inf,&line);
		}
		ajStrRemoveWhiteExcess(&sumline);
		embBtreeGenBankDE(sumline,entry->de,entry->delen);
		continue;
	    }
	

	if(entry->do_taxonomy)
	    if(ajStrPrefixC(line,"SOURCE"))
	    {
		ret = ajReadlineTrim(inf,&line);
		ajStrAppendC(&line,";");
		while(ret && *MAJSTRGETPTR(line)==' ')
		{
		    ajStrAppendS(&sumline,line);
		    ret = ajReadlineTrim(inf,&line);
		}
		ajStrRemoveWhiteExcess(&sumline);
		embBtreeGenBankTX(sumline,entry->tx,entry->txlen);
		continue;
	    }
	

	pos = ajFileResetPos(inf);

	if(!ajReadlineTrim(inf,&line))
	    ret = ajFalse;
    }

    ajStrDel(&line);
    ajStrDel(&sumline);
    
    return ret;
}
Esempio n. 12
0
EmbPPropAmino* embPropEaminoRead(AjPFile mfptr)
{
    AjPStr  line  = NULL;
    AjPStr  token = NULL;
    
    AjBool firstline;
    
    const char *p;
    ajuint i;
    ajint  n;
    
    EmbPPropAmino *ret;

    line  = ajStrNew();
    token = ajStrNew();
    
    firstline = ajTrue;

    AJCNEW0(ret,EMBPROPSIZE);

    for(i=0; i < EMBPROPSIZE; ++i)
	AJNEW0(ret[i]);
    
    while(ajReadline(mfptr, &line))
    {
	ajStrRemoveWhiteExcess(&line);
	p = ajStrGetPtr(line);

	if(*p=='#' || *p=='!' || !*p)
	    continue;

	if(firstline)
	{
	    if(!ajStrPrefixC(line,"aa"))
		ajFatal("Incorrect (old?) format amino data file");

	    firstline = ajFalse;
	    continue;
	}

	ajFmtScanS(line,"%S",&token);
	ajStrFmtUpper(&token);

	if(ajStrGetLen(token) != 1)
	    ajFatal("Amino file line doesn't begin with a single character");

	i = ajBasecodeToInt((ajint) *ajStrGetPtr(token));

	if(i == 27)
	    ajFatal("Amino file line doesn't begin with a single A->Z (%S)",
		    line);

	n = ajFmtScanS(line,"%*s%d%d%d%d%d%d%f%d%d%d",
		       &ret[i]->tiny,
		       &ret[i]->sm_all,
		       &ret[i]->aliphatic,
		       &ret[i]->aromatic,
		       &ret[i]->nonpolar,
		       &ret[i]->polar,
		       &ret[i]->charge,
		       &ret[i]->pve,
		       &ret[i]->nve,
		       &ret[i]->extcoeff);
	if(n!= 10)
	    ajFatal("Only %d columns in amino file - expected %d",n+1,11);
    }

    ajStrDel(&line);
    ajStrDel(&token);

    return ret;
}
Esempio n. 13
0
EmbPPropMolwt* embPropEmolwtRead(AjPFile mfptr)
{
    AjPStr  line  = NULL;
    AjPStr  token = NULL;
    
    AjBool firstline;
    
    const char *p;
    ajuint i;
    ajint  n;
    
    EmbPPropMolwt *ret;

    line  = ajStrNew();
    token = ajStrNew();
    
    firstline = ajTrue;

    AJCNEW0(ret,EMBPROPSIZE+2);

    for(i=0; i < EMBPROPSIZE+2; ++i)
	AJNEW0(ret[i]);
    
    while(ajReadline(mfptr, &line))
    {
	ajStrRemoveWhiteExcess(&line);
	p = ajStrGetPtr(line);

	if(*p=='#' || *p=='!' || !*p)
	    continue;

	if(firstline)
	{
	    if(!ajStrPrefixC(line,"Mol"))
		ajFatal("Incorrect format molwt file: '%S'", line);

	    firstline = ajFalse;
	    continue;
	}

	ajFmtScanS(line,"%S",&token);
	ajStrFmtUpper(&token);

	if(ajStrGetLen(token) != 1)
	{
	    if(ajStrPrefixC(token,"HYDROGEN"))
	    {
		if(ajFmtScanS(line,"%*s%lf%lf",
			      &ret[EMBPROPHINDEX]->average,
			      &ret[EMBPROPHINDEX]->mono) != 2)
		    ajFatal("Bad format hydrogen data line");
	    }
	    else if(ajStrPrefixC(token,"OXYGEN"))
	    {
		if(ajFmtScanS(line,"%*s%lf%lf",
			      &ret[EMBPROPOINDEX]->average,
			      &ret[EMBPROPOINDEX]->mono) != 2)
		    ajFatal("Bad format oxygen data line");
	    }
	    else if(ajStrPrefixC(token,"WATER"))
	    {
		if(ajFmtScanS(line,"%*s%lf%lf",
			      &ret[EMBPROPWINDEX]->average,
			      &ret[EMBPROPWINDEX]->mono) != 2)
		    ajFatal("Bad format water data line");
	    }
	    else
		ajFatal("Unknown molwt token %S",token);

	    continue;
	}


	i = ajBasecodeToInt((ajint) *ajStrGetPtr(token));

	if(i == 27)
	    ajFatal("Molwt file line doesn't begin with a single A->Z (%S)",
		    line);

	n = ajFmtScanS(line,"%*s%lf%lf",
		       &ret[i]->average,
		       &ret[i]->mono);
	if(n != 2)
	    ajFatal("Only %d columns in amino file - expected %d",n,3);
    }

    ajStrDel(&line);
    ajStrDel(&token);

    return ret;
}
Esempio n. 14
0
/* @funcstatic acdrelations_writerelations ************************************
**
** Writes relations: attribute for an ACD data definition
** The relations: values given in knowntypes.standard have highest precedence,
** then the values given in edamtoacd.dat
** Attribute values for a given datatype in edamtoacd.dat are in order of
** increasing precedence, i.e. the last line is highest and the relations:
** value will be used if all conditions are met.
**
** @param [r] outf    [AjPFile] ACD output file
** @param [r] acdtype [AjPStr ] ACD datatype, e.g. "align"
** @param [r] strarr  [AjPStr*] All ACD attribute lines (whitespace removed)
**                              for the the current ACD data item (of type
**                              acdtype).  One line per array element.
** @param [r] n       [ajint]   Size of strarr
** @param [r] P       [PEdam]   edam object to write
** @param [r] T       [PKtype]  ktype object to read
** @return [void] 
** @@
******************************************************************************/
static void acdrelations_writerelations
            (AjPFile outf, 
	     AjPStr  acdtype,
	     AjPStr *strarr, 
             ajint   n,
	     PEdam   P,
    	     PKtype  T)
{
    ajint  i         = 0;
    ajint  j         = 0;
    ajint  k         = 0;
    ajint  nmatch    = 0;
    AjPStr relations = NULL;
    AjPStr ktype     = NULL;     /* Value of knowntype: attribute */
    AjBool done      = ajFalse;
    AjBool donetype  = ajFalse;
    AjPStr tmpstr    = NULL;
    
    
    if(!outf || !acdtype || !strarr || !n || !P)
        ajFatal("NULL args passed to acdrelations_writerelations");

    /* Memory allocation */
    relations = ajStrNew();
    ktype     = ajStrNew();
    tmpstr    = ajStrNew();

    
    /* Loop through all lines in edamtoacd.dat */
    for(i=0; i<P->n ;i++)
    {
        /* Found matching datatype */
        if(ajStrMatchS(acdtype, P->dat[i]->acdtype))
        {
            /* Copy first relations: string defined for this datatype (default) */
            ajStrAssignS(&relations, P->dat[i]->edam);
            done = ajTrue;
            i++;

            /* Check next line in edamtoacd.dat */
            for( ; i<P->n; i++)
            {
                /* Datatype still matches */
                if(ajStrMatchS(acdtype, P->dat[i]->acdtype))
                {
                    /* Loop through all required attributes for this datatype */
                    for(nmatch=0, j=0; j<P->dat[i]->n; j++)
                    {
                        /* Loop through all attribute lines for the data defininition */
                        for(k=0; k<n; k++)
                            if(ajStrMatchS(P->dat[i]->acdattr[j], strarr[k]))
                            {
                                nmatch++;
                                /* ajFmtPrint("Found match %d:  %S:%S\n", nmatch, P->dat[i]->acdattr[j], strarr[k]); */
                                
                                break;
                            }
                    }
                    /* All attribute values match */
                    if(nmatch == P->dat[i]->n)
                        ajStrAssignS(&relations, P->dat[i]->edam);
                    /* Should never happen */
                    else if (nmatch > P->dat[i]->n)
                        ajFatal("Terminal weirdness in acdrelations_writerelations");
                }
                else
                    break;
            }
            break;            
        }
    }

    /* Check for match of knowntype: attribute against knowntypes.standard.
       These have higher precedence than the rules defined in edamtoacd.dat */
    for(donetype=ajFalse, i=0; i<n; i++)
    {
        if(ajStrPrefixC(strarr[i], "knowntype:"))
        {
            
            for(j=0;j<T->n; j++)
            {
                /* No check is made on the "Type" column in knowntypes.standard
                   as these are not proper ACD datatype names
                   To check these add
                   if(ajStrMatchS(acdtype, T->dat[j]->acdtype)) */
                
                ajFmtPrintS(&tmpstr, "knowntype:\"%S\"", T->dat[j]->ktype);

                    if(ajStrMatchS(tmpstr, strarr[i]))
                    {
                        ajStrAssignS(&relations, T->dat[j]->edam);
                        donetype=ajTrue;
                        break;
                    }
            }
            if(donetype)
                break;
        }
        else
            continue;
    }
    
    
    if(!done)
        ajFatal("No matching datatype (%S) in acdrelations_writerelations", acdtype);
    
    
    /* Write relations: attribute line to file */
    ajFmtPrintF(outf, "    relations:%S\n", relations);


    /* Free memory */
    ajStrDel(&relations);
    ajStrDel(&ktype);
    ajStrDel(&tmpstr);
    
    return;
}
Esempio n. 15
0
static void acdrelations_readdatfile
            (AjPFile inf, 
	     PEdam *P)
{
  AjPStr  line           = NULL;
  const AjPStr  tok      = NULL;
  const AjPStr  subtok   = NULL;
  AjPStr  strtmp         = NULL;
  AjPList strlist        = NULL;

  AjPStr  acdtype        = NULL;
  AjPStr  relations      = NULL;

  PEdamdat dattmp        = NULL;
  AjPList  datlist       = NULL;
  
  if(!P)
    ajFatal("Null arg error 1 in acdrelations_readdatfile");
  if(!inf)
    ajFatal("Null arg error 3 in acdrelations_readdatfile");  


  /* Allocate memory */
  line      = ajStrNew();
  acdtype   = ajStrNew();
  relations = ajStrNew();
  datlist   = ajListNew();

  /* Read data from file */
  while(ajReadline(inf,&line))
    {
      /* Discard comment lines */
      if(ajStrPrefixC(line,"#")) 
	continue;

      
      
      /* Tokenise line, delimited by '|'.
         Parse first token (ACD datatype ) */
      ajStrAssignS(&acdtype, ajStrParseC(line, "|")); 
      
      /* Parse second token (EDAM relations: value ) */
      ajStrAssignS(&relations, ajStrParseC(NULL, "|")); 

      /* Parse third token (attribute:value strings block) */
      tok = ajStrParseC(NULL, "|");
        

      /* Create new string list */
      strlist = ajListstrNew();

      /* Tokenise third token itself into tokens delimited by ' ' (space)
         Parse tokens (individual attribute:value strings)*/
      if((subtok=ajStrParseC(tok, ";")))
      {
          strtmp = ajStrNew();
          ajStrAssignS(&strtmp, subtok);
          ajStrRemoveWhite(&strtmp);
          ajListstrPushAppend(strlist, strtmp);
              
          while((subtok=ajStrParseC(NULL, ";")))
          {
              strtmp = ajStrNew();
              ajStrAssignS(&strtmp, subtok);
              ajStrRemoveWhite(&strtmp);
              ajListstrPushAppend(strlist, strtmp);
          }
      }
      
      /* Write PEdamdat structure & push onto list */
      dattmp = ajEdamdatNew();
      ajStrRemoveWhite(&acdtype);
      ajStrAssignS(&dattmp->acdtype, acdtype);
      ajStrAssignS(&dattmp->edam, relations);
      dattmp->n = ajListstrToarray(strlist, &dattmp->acdattr);
      ajListPushAppend(datlist, dattmp);
      
      /* Clear nodes (but not strings) from string list */
      ajListstrFree(&strlist);
    }
  

  /* Write PEdam structure */
  ((*P)->n) = ajListToarray(datlist, (void***) &((*P)->dat));

  /* Free memory */
  ajStrDel(&line);
  ajStrDel(&acdtype);
  ajStrDel(&relations);
  ajListFree(&datlist);

  return;
}
Esempio n. 16
0
static void jaspextract_copyfiles(AjPStr directory)
{
    AjPStr matrixfile = NULL;
    AjPList flist     = NULL;
    
    AjPStr wild  = NULL;
    AjPStr entry = NULL;
    AjPStr bname = NULL;
    AjPStr line  = NULL;
    AjPStr dest  = NULL;

    const AjPStr datadir = NULL;
    
    ajuint preflen = 0;
    ajuint i       = 0;
    const char *p  = NULL;

    AjPFile inf   = NULL;
    AjPFile outf  = NULL;    
    
    matrixfile = ajStrNew();
    flist      = ajListNew();
    wild       = ajStrNewC("*.pfm");
    bname      = ajStrNew();
    line       = ajStrNew();
    dest       = ajStrNew();


    datadir = ajDatafileValuePath();
    if(!datadir)
        ajFatal("jaspextract: Cannot determine the EMBOSS data directory");
    
    ajFmtPrintS(&matrixfile,"%S%s",directory,MATRIXFILE);

    if(!ajFilenameExistsRead(matrixfile))
        ajFatal("jaspextract: Directory (%S) doesn't appear to be a JASPAR "
                "one\nNo matrix_list.txt file found",directory);
    
    ajFilelistAddPathWild(flist, directory, wild);


    while(ajListPop(flist,(void **)&entry))
    {
        ajStrAssignS(&bname,entry);
        ajFilenameTrimPath(&bname);
        
        i = 0;

        while(Jprefix[i].Prefix)
        {
            if(!ajStrPrefixC(bname,Jprefix[i].Prefix))
            {
                ++i;
                continue;
            }

            preflen = strlen(Jprefix[i].Prefix);
            p = ajStrGetPtr(bname);
            if(p[preflen]>='0' && p[preflen]<='9')
                break;

            ++i;
        }

        if(!Jprefix[i].Prefix)
        {
            ajStrDel(&entry);
            continue;
        }


        ajFmtPrintS(&dest,"%S%s%c%S",datadir,Jprefix[i].Directory,SLASH_CHAR,
                    bname);

        outf = ajFileNewOutNameS(dest);
        if(!outf)
            ajFatal("Cannot open output file %S",dest);

        /* Avoid UNIX copy for portability */
        inf  = ajFileNewInNameS(entry);
        if(!inf)
            ajFatal("Cannot open input file: %S",entry);

        while(ajReadlineTrim(inf,&line))
            ajFmtPrintF(outf,"%S\n",line);

        ajFileClose(&inf);
        ajFileClose(&outf);
        
        ajStrDel(&entry);        

    }
    
    ajListFree(&flist);
    
    ajStrDel(&wild);
    ajStrDel(&dest);
    ajStrDel(&line);
    ajStrDel(&bname);
    ajStrDel(&matrixfile);

    return;
}
Esempio n. 17
0
AjPPatlistSeq ajPatlistSeqRead (const AjPStr patspec,
				const AjPStr patname,
				const AjPStr fmt,
				AjBool protein, ajuint mismatches)
{
    AjPPatlistSeq patlist = NULL;
    AjPStr line = NULL;
    AjPStr name = NULL;
    AjPFilebuff infile = NULL;
    AjPRegexp mismreg = NULL;
    AjPStr patstr = NULL;
    AjPStr pat = NULL;
    ajuint mismatch = 0;
    ajint ifmt = 0;
    ajuint npat = 0;
    AjPStr namestr = NULL;

    ajStrAssignS(&namestr, patname);
    ajStrAssignEmptyC(&namestr, "pattern");

    ajStrAssignS(&patstr, patspec);

    patlist = ajPatlistSeqNewType(protein);

    ifmt = patternSeqFormat(fmt);

    ajDebug("ajPatlistSeqRead patspec: '%S' patname: '%S' "
	    "protein: %B mismatches: %d\n",
	    patspec, patname, protein, mismatches);

    if(ajStrGetCharFirst(patstr) == '@')
    {
	ajStrCutStart(&patstr, 1);
	infile = ajFilebuffNewNameS(patstr);

	if(!infile)
	{
	    ajErr("Unable to open pattern file '%S'", patstr);

	    return NULL;
	}

	line = ajStrNew();
	name = ajStrNew();

	if(!ifmt)
	{
	    ajBuffreadLineTrim(infile,&line);

	    if(ajStrPrefixC(line, ">"))
		ifmt = 2;
	    else
		ifmt = 1;
	    ajFilebuffReset(infile);
	}
	
	switch(ifmt)
	{
	case 1:
	    while (ajBuffreadLineTrim(infile,&line))
	    {
		npat++;
		ajStrAppendS (&pat,line);
		ajFmtPrintS(&name, "%S%u", namestr, npat);
		ajPatternSeqNewList(patlist,name,pat,mismatches);
		ajStrSetClear(&pat);
	    }
	    break;
	default:
	    mismreg = ajRegCompC("<mismatch=(\\d+)>");

	    while (ajBuffreadLineTrim(infile,&line))
	    {
		if (ajStrGetCharFirst(line) == '>')
		{
		    if (ajStrGetLen(name))
		    {
			ajPatternSeqNewList(patlist,name,pat,
					    mismatch);
			ajStrSetClear(&name);
			ajStrSetClear(&pat);
			mismatch=mismatches;
		    }

		    ajStrCutStart(&line,1);

		    if (ajRegExec(mismreg,line))
		    {
			ajRegSubI(mismreg,1,&name);
			ajStrToUint(name,&mismatch);
			ajStrTruncateLen(&line,ajRegOffset(mismreg));
			ajStrTrimWhiteEnd(&line);
		    }
		    ajStrAssignS (&name,line);
		    ajStrAssignEmptyS(&name, patname);
		}
		else
		    ajStrAppendS (&pat,line);
	    }

	    ajStrAssignEmptyS(&name, patname);
	    ajPatternSeqNewList(patlist,name,pat,mismatch);
	    ajRegFree(&mismreg);
	    break;
	}

	ajFilebuffDel(&infile);
    }
    else
    {
        ajStrAssignS(&name, namestr);
	ajPatternSeqNewList(patlist,name,patstr,mismatches);
    }

    ajStrDel(&name);
    ajStrDel(&line);
    ajStrDel(&pat);
    ajStrDel(&namestr);
    ajStrDel(&patstr);

    return patlist;
}
Esempio n. 18
0
static AjBool dbxflat_ParseGenbank(EmbPBtreeEntry entry, AjPFile inf)
{
    AjPStr line = NULL;
    ajlong pos  = 0L;
    AjBool ret = ajTrue;
    
    AjPStr sumline = NULL;
    
    line = ajStrNewC("");
    sumline = ajStrNew();

    
    while(!ajStrPrefixC(line,"//") && ret)
    {
	if(ajStrPrefixC(line,"LOCUS"))
	{
	    entry->fpos = pos;
	    ajFmtScanS(line,"%*S%S",&entry->id);
	}
	
	if(svfield)
	    if(ajStrPrefixC(line,"VERSION"))
		embBtreeGenBankAC(line,svfield->data);
	
	if(accfield)
	    if(ajStrPrefixC(line,"ACCESSION"))
		embBtreeGenBankAC(line,accfield->data);
	
	if(keyfield)
	    if(ajStrPrefixC(line,"KEYWORDS"))
	    {
		ajStrAssignS(&sumline,line);
		ret = ajReadlineTrim(inf,&line);
                while(ret && *MAJSTRGETPTR(line)==' ')
		{
		    ajStrAppendS(&sumline,line);
		    ret = ajReadlineTrim(inf,&line);
		}
		ajStrRemoveWhiteExcess(&sumline);
		embBtreeGenBankKW(sumline,keyfield->data,keyfield->len);
		continue;
	    }

	if(desfield)
	    if(ajStrPrefixC(line,"DEFINITION"))
	    {
		ajStrAssignS(&sumline,line);
		ret = ajReadlineTrim(inf,&line);
		while(ret && *MAJSTRGETPTR(line)==' ')
		{
		    ajStrAppendS(&sumline,line);
		    ret = ajReadlineTrim(inf,&line);
		}
		ajStrRemoveWhiteExcess(&sumline);
		embBtreeGenBankDE(sumline,desfield->data,desfield->len);
		continue;
	    }
	

	if(orgfield)
	    if(ajStrPrefixC(line,"SOURCE"))
	    {
		ret = ajReadlineTrim(inf,&line);
		ajStrAppendC(&line,";");
		while(ret && *MAJSTRGETPTR(line)==' ')
		{
		    ajStrAppendS(&sumline,line);
		    ret = ajReadlineTrim(inf,&line);
		}
		ajStrRemoveWhiteExcess(&sumline);
		embBtreeGenBankTX(sumline,orgfield->data,orgfield->len);
		continue;
	    }
	

	pos = ajFileResetPos(inf);

	if(!ajReadlineTrim(inf,&line))
	    ret = ajFalse;
    }

    ajStrDel(&line);
    ajStrDel(&sumline);
    
    return ret;
}
Esempio n. 19
0
int main(int argc, char **argv)
{

    AjPFile inf	 = NULL;
    AjPFile inf2 = NULL;
    AjPFeattable tab = NULL;
    AjPReport report = NULL;

    AjPSeq sequence = NULL;

    AjPStr redatanew = NULL;
    AjPStr str	     = NULL;
    AjPStr regexp    = NULL;
    AjPStr temp	     = NULL;
    AjPStr text      = NULL;
    AjPStr docdata   = NULL;
    AjPStr data      = NULL;
    AjPStr accession = NULL;
    AjPStr name      = NULL;
    EmbPPatMatch match = NULL;
    AjPStr savereg = NULL;
    AjPStr fthit   = NULL;

    AjBool full;
    AjBool prune;

    ajint i;
    ajint number;
    ajint start;
    ajint end;
    ajint length;
    ajint zstart;
    ajint zend;
    const char *p;
    ajint seqlength;
    AjPStr tmpstr  = NULL;
    AjPStr tailstr = NULL;
    AjPFeature gf;

    embInit("patmatmotifs", argc, argv);

    ajStrAssignC(&fthit, "SO:0001067");

    savereg   = ajStrNew();
    str       = ajStrNew();
    regexp    = ajStrNew();
    temp      = ajStrNew();
    data      = ajStrNew();
    accession = ajStrNew();
    text      = ajStrNew();
    name      = ajStrNew();

    sequence = ajAcdGetSeq("sequence");
    report   = ajAcdGetReport("outfile");
    full     = ajAcdGetBoolean("full");
    prune    = ajAcdGetBoolean("prune");

    ajSeqFmtUpper(sequence);		/* prosite regexs are all upper case */
    tab = ajFeattableNewSeq(sequence);
    ajStrAssignC(&tailstr, "");

    seqlength = ajStrGetLen(str);
    str       = ajSeqGetSeqCopyS(sequence);

    redatanew = ajStrNewC("PROSITE/prosite.lines");
    docdata   = ajStrNewC("PROSITE/");

    inf = ajDatafileNewInNameS(redatanew);
    if(!inf)
	ajFatal("Either EMBOSS_DATA undefined or PROSEXTRACT needs running");

    ajFmtPrintAppS(&tmpstr, "Full: %B\n", full);
    ajFmtPrintAppS(&tmpstr, "Prune: %B\n", prune);
    ajFmtPrintAppS(&tmpstr, "Data_file: %F\n", inf);
    ajReportSetHeaderS(report, tmpstr);

    while(ajReadlineTrim(inf, &regexp))
    {
	p=ajStrGetPtr(regexp);
	if(*p && *p!=' ' && *p!='^')
	{
	    p=ajSysFuncStrtok(p," ");
	    ajStrAssignC(&name,p);
	    if(prune)
		if(ajStrMatchCaseC(name,"myristyl") ||
		   ajStrMatchCaseC(name,"asn_glycosylation") ||
		   ajStrMatchCaseC(name,"camp_phospho_site") ||
		   ajStrMatchCaseC(name,"pkc_phospho_site") ||
		   ajStrMatchCaseC(name,"ck2_phospho_site") ||
		   ajStrMatchCaseC(name,"tyr_phospho_site"))
		{
		    for(i=0;i<4;++i)
			ajReadlineTrim(inf, &regexp);
		    continue;
		}
	    p=ajSysFuncStrtok(NULL," ");
	    ajStrAssignC(&accession,p);
	}

	if(ajStrPrefixC(regexp, "^"))
	{
	    p = ajStrGetPtr(regexp);

	    ajStrAssignC(&temp,p+1);
	    ajStrAssignC(&savereg,p+1);

	    match = embPatMatchFind(temp, str, ajFalse, ajFalse);
	    number = embPatMatchGetNumber(match);

	    for(i=0; i<number; i++)
	    {
		seqlength = ajStrGetLen(str);

		start = 1+embPatMatchGetStart(match, i);

		end = 1+embPatMatchGetEnd(match, i);

		length = embPatMatchGetLen(match, i);

		gf = ajFeatNew(tab, NULL, fthit, start, end,
			       (float) length, ' ', 0);

		ajFmtPrintS(&tmpstr, "*motif %S", name);
		ajFeatTagAddSS(gf, NULL, tmpstr);

		if(start-5<0)
		    zstart = 0;
		else
		    zstart = start-5;

		if(end+5> seqlength)
		    zend = end;
		else
		    zend = end+5;


		ajStrAssignSubS(&temp, str, zstart, zend);
	    }


	    if(full && number)
	    {
		ajStrAssignC(&redatanew,ajStrGetPtr(docdata));
		ajStrAppendC(&redatanew,ajStrGetPtr(accession));
		inf2 = ajDatafileNewInNameS(redatanew);
		if(!inf2)
		    continue;

		/*
		** Insert Prosite documentation from files made by
		** prosextract.c
		*/
		ajFmtPrintAppS(&tailstr, "Motif: %S\n", name);
		ajFmtPrintAppS(&tailstr, "Count: %d\n\n", number);
		while(ajReadlineTrim(inf2, &text))
		    ajFmtPrintAppS(&tailstr, "%S\n", text);

		ajFmtPrintAppS(&tailstr, "\n***************\n\n");
		ajFileClose(&inf2);

	    }
	    embPatMatchDel(&match);
	}
    }

    ajReportSetTailS(report,tailstr);
    ajReportWrite(report, tab, sequence);

    ajReportDel(&report);
    ajFeattableDel(&tab);

    ajStrDel(&temp);
    ajStrDel(&regexp);
    ajStrDel(&savereg);
    ajStrDel(&str);
    ajStrDel(&data);
    ajStrDel(&docdata);
    ajStrDel(&text);
    ajStrDel(&redatanew);
    ajStrDel(&accession);
    ajSeqDel(&sequence);
    ajStrDel(&tailstr);
    ajStrDel(&fthit);
    ajStrDel(&name);
    ajStrDel(&tmpstr);

    ajFeattableDel(&tab);
    ajFileClose(&inf);

    embExit();

    return 0;
}
Esempio n. 20
0
static AjBool dbxflat_ParseFastq(EmbPBtreeEntry entry, AjPFile inf)
{
    AjPStr line = NULL;
    ajlong pos  = 0L;
    ajuint seqlen = 0;
    ajuint qlen = 0;
    AjPStr tmpfd  = NULL;
    AjPStr str = NULL;
    AjPStr de = NULL;
    AjBool ok;

    if(!dbxflat_wrdexp)
	dbxflat_wrdexp = ajRegCompC("([A-Za-z0-9.:=]+)");

    line = ajStrNewC("");
    
    pos = ajFileResetPos(inf);

    if(!ajReadlineTrim(inf,&line))
    {
        ajStrDel(&line);
        return ajFalse;
    }

    /* first line of entry */

    if(!ajStrPrefixC(line,"@"))
        return ajFalse;

    entry->fpos = pos;
    ajStrCutStart(&line, 1);
    ajStrExtractFirst(line, &de, &entry->id);

    if(desfield && ajStrGetLen(de))
    {
	while(ajRegExec(dbxflat_wrdexp,de))
	{
	    ajRegSubI(dbxflat_wrdexp, 1, &tmpfd);
	    str = ajStrNew();
	    ajStrAssignS(&str,tmpfd);
	    ajListPush(desfield->data,(void *)str);
	    ajRegPost(dbxflat_wrdexp, &de);
	}
    }

/* now read sequence */
    ok = ajReadlineTrim(inf,&line);
    while(ok && !ajStrPrefixC(line, "+"))
    {
        ajStrRemoveWhite(&line);
        seqlen += MAJSTRGETLEN(line);
        ok = ajReadlineTrim(inf,&line);
    }

    if(!ok)
        return ajFalse;

    ok = ajReadlineTrim(inf,&line);
    while(ok)
    {
        qlen += MAJSTRGETLEN(line);
        if(qlen < seqlen)
            ok = ajReadlineTrim(inf,&line);
        else
            ok = ajFalse;
    }

    ajStrDel(&de);
    ajStrDel(&tmpfd);
    ajStrDel(&line);
    
    return ajTrue;
}
Esempio n. 21
0
/* @funcstatic seqwords_keysearch  ********************************************
**
** Search swissprot with terms structure and writes a hitlist structure
**
** @param [r] inf   [AjPFile]     File pointer to swissprot database
** @param [r] terms [AjPTerms]    Terms object pointer
** @param [w] hits  [EmbPHitlist*] Hitlist object pointer
**
** @return [AjBool] True on success
** @@
******************************************************************************/
static AjBool seqwords_keysearch(AjPFile inf, 
				 AjPTerms terms,
				 EmbPHitlist *hits)
{
    AjPStr   line           =NULL;	/* Line of text. */
    AjPStr   id             =NULL;	/* Line of text. */
    AjPStr   temp           =NULL;
    ajint    s              =0;         /* Temp. start of hit value. */
    ajint    e              =0;         /* Temp. end of hit value. */
    AjPInt   start          =NULL;      /* Array of start of hit(s). */
    AjPInt   end            =NULL;      /* Array of end of hit(s). */
    ajint    nhits          =0;         /* Number of hits. */
    ajint    x              =0;         
    AjBool   foundkw        =ajFalse;
    AjBool   foundft        =ajFalse;


    /* Check for valid args. */
    if(!inf)
	return ajFalse;
    

    

    /* Allocate strings and arrays. */
    line       = ajStrNew();
    id         = ajStrNew();
    start      = ajIntNew();
    end        = ajIntNew();



    /* Start of main loop. */
    while((ajReadlineTrim(inf,&line)))
    {
	/* Parse the AC line. */
	if(ajStrPrefixC(line,"AC"))
	{
	    /* Copy accesion number and remove the ';' from the end. */
	    ajFmtScanS(line, "%*s %S", &id);
	    ajStrExchangeCC(&id, ";", "\0");
	    
	    
	    /* Reset flags & no. hits. */
	    foundkw=ajFalse;
	    foundft=ajFalse;
	    nhits=0;
	}
	
	
	/* Search the description and keyword lines with search terms. */
	else if((ajStrPrefixC(line,"DE") || (ajStrPrefixC(line,"KW"))))
	{
	    /* 
	     ** Search terms have a leading and trailing space to prevent 
	     ** them being found as substrings within other words.  To 
	     ** catch cases where a DE or KW line begins with a search 
	     ** term, we must add a leading and trailing space to line.
	     ** We must first remove punctation from the line to be parsed.
	     */
	    ajStrExchangeSetCC(&line, ".,;:", "    ");
	    ajStrAppendK(&line, ' ');
	    ajStrInsertC(&line, 0, " ");


	    for (x = 0; x < terms->N; x++)
		/* Search term is found. */
		if((ajStrFindCaseS(line, terms->Keywords[x])!=-1))
		{	
		    foundkw=ajTrue;
		    break;
		}
	}

	
	/* Search the feature table line with search terms. */
	else if((ajStrPrefixC(line,"FT   DOMAIN")))
	{
	    /*	
	     ** Search terms have a leading and trailing space to prevent 
	     ** them being found as substrings within other words.  To 
	     ** catch cases where a FT line ends with a search 
	     ** term, we must add a  trailing space to line 
	     ** We must first remove punctation from the line to be parsed.
	     */
	    ajStrExchangeSetCC(&line, ".,;:", "    ");
	    ajStrAppendK(&line, ' ');
	    

	    for (x = 0; x < terms->N; x++)
		if((ajStrFindCaseS(line, terms->Keywords[x])!=-1))
		{
		    /* Search term is found. */
		    foundft = ajTrue;
		    nhits++;
		    
		    /* Assign start and end of hit. */
		    ajFmtScanS(line, "%*s %*s %d %d", &s, &e);


		    ajIntPut(&start, nhits-1, s);
		    ajIntPut(&end, nhits-1, e);
		    break;
		}
	}
	

	/* Parse the sequence. */
	else if((ajStrPrefixC(line,"SQ") && ((foundkw == ajTrue) ||
					     (foundft == ajTrue))))
	{
	    /* Allocate memory for temp. sequence. */
	    temp       = ajStrNew();


	    /* Read the sequence into hitlist structure. */
	    while((ajReadlineTrim(inf,&line)) && !ajStrPrefixC(line,"//"))
		/* Read sequence line into temp. */
		ajStrAppendC(&temp,ajStrGetPtr(line)+3);
 

	    /* Clean up temp. sequence. */
	    ajStrRemoveWhite(&temp);


	    /*Priority is given to domain (rather than full length) sequence.*/
	    if(foundft)
	    {
		for(x=0;x<nhits;x++)
		{
		    /* Increment counter of hits for subsequent hits*/
		    (*hits)->N++;

		    
		    /* Reallocate memory for array of hits in hitlist
                       structure. */
		    AJCRESIZE((*hits)->hits, (*hits)->N);
		    (*hits)->hits[(*hits)->N-1]=embHitNew();
		    ajStrAssignC(&(*hits)->hits[(*hits)->N-1]->Model,
				 "KEYWORD");
		    

		    /* Assign start and end of hit. */
		    (*hits)->hits[(*hits)->N-1]->Start = ajIntGet(start, x);
		    (*hits)->hits[(*hits)->N-1]->End = ajIntGet(end, x);
				

		    /* Extract sequence within specified range */
		    ajStrAssignSubS(&(*hits)->hits[(*hits)->N - 1]->Seq, temp, 
				(*hits)->hits[(*hits)->N - 1]->Start - 1, 
				(*hits)->hits[(*hits)->N - 1]->End - 1);
		    

		    /* Put id into structure */
		    ajStrAssignRef(&(*hits)->hits[(*hits)->N - 1]->Acc, id);
		}
	    }
	    else
	    {
		/* Increment counter of hits */
		(*hits)->N++;

		    
		/* Reallocate memory for array of hits in hitlist structure */
		AJCRESIZE((*hits)->hits, (*hits)->N);
		(*hits)->hits[(*hits)->N-1]=embHitNew();
		ajStrAssignC(&(*hits)->hits[(*hits)->N-1]->Model, "KEYWORD");

		/* Extract whole sequence */
		ajStrAssignRef(&(*hits)->hits[(*hits)->N - 1]->Seq, temp); 
		(*hits)->hits[(*hits)->N - 1]->Start = 1; 
		(*hits)->hits[(*hits)->N - 1]->End =
		    ajStrGetLen((*hits)->hits[(*hits)->N - 1]->Seq); 


		/* Put id into structure */
		ajStrAssignRef(&(*hits)->hits[(*hits)->N - 1]->Acc, id);
	    }

	    /* Free temp. sequence */
	    ajStrDel(&temp);
	}
    }


    /* Clean up */
    ajStrDel(&line);
    ajStrDel(&id);
    ajIntDel(&start);
    ajIntDel(&end);

    return ajTrue;
}
Esempio n. 22
0
int main(int argc, char **argv)
{
    AjPFile infdat = NULL;
    AjPFile infdoc = NULL;
    AjPFile outf   = NULL;
    AjPFile outs   = NULL;

    AjBool  haspattern;

    const char   *p;


    AjPStr line  = NULL;
    AjPStr text  = NULL;
    AjPStr dirname  = NULL;
    AjPStr filename = NULL;
    AjPStr id    = NULL;
    AjPStr ac    = NULL;
    AjPStr de    = NULL;
    AjPStr pa    = NULL;
    AjPStr ps    = NULL;
    AjPStr fn    = NULL;
    AjPStr re    = NULL;
    AjPStr fname = NULL;
    AjBool flag;
    AjBool isopen;
    AjBool goback;

    ajlong storepos = 0L;


    embInit("prosextract", argc, argv);

    dirname = ajAcdGetDirectoryName("prositedir");

    line = ajStrNew();
    text = ajStrNew();

    id = ajStrNew();
    ac = ajStrNew();
    de = ajStrNew();
    pa = ajStrNew();
    ps = ajStrNew();



    fn=ajStrNew();
    ajStrAssignS(&fn,dirname);
    ajStrAppendC(&fn,"prosite.dat");
    if(!(infdat=ajFileNewInNameS(fn)))
	ajFatal("Cannot open file %S",fn);
    ajStrDel(&fn);



    fn=ajStrNewC("PROSITE/prosite.lines");
    outf = ajDatafileNewOutNameS(fn);
    ajStrDel(&fn);



    haspattern = ajFalse;

    while(ajReadlineTrim(infdat, &line) )
    {
	if(ajStrPrefixC(line, "ID"))
	{
	    if(ajStrSuffixC(line,"PATTERN."))
	    {
		haspattern = ajTrue;
		/*save id*/
		p = ajStrGetPtr(line);
		p = ajSysFuncStrtok(p," \t;");
		p = ajSysFuncStrtok(NULL," \t;");
		ajStrAssignC(&id,p);
		ajFmtPrintF(outf, "%S ", id);
		continue;
	    }
	    else
	    {
		haspattern = ajFalse;
		continue;
	    }
	}

	if(!haspattern)
	    continue;


	if(ajStrPrefixC(line, "AC") )
	{
	    p = ajStrGetPtr(line);
	    p = ajSysFuncStrtok(p, " \t;");
	    p = ajSysFuncStrtok(NULL, " \t;");
	    ajStrAssignC(&ac,p);
	    ajFmtPrintF(outf, "%S\n ", ac);
	    continue;
	}

    	if(ajStrPrefixC(line, "DE") )
	{
	    p = ajStrGetPtr(line);
	    p = ajSysFuncStrtok(p, " \t.");
	    p = ajSysFuncStrtok(NULL, " \t.");
	    ajStrAssignC(&de,p);
	    ajFmtPrintF(outf, "%S\n ", de);
	    continue;
	}


	if(ajStrPrefixC(line, "PA"))
	{
	    ajStrAssignC(&pa,"");

	    while(ajStrPrefixC(line,"PA"))
	    {
		p = ajStrGetPtr(line);
		p = ajSysFuncStrtok(p, " \t.");
		p = ajSysFuncStrtok(NULL, " \t.");
		ajStrAppendC(&pa,p);
		ajReadlineTrim(infdat, &line);
	    }

	    ajFmtPrintF(outf, "%S\n", pa);
	    re = embPatPrositeToRegExp(pa);
	    ajFmtPrintF(outf, "^%S\n\n", re);
	    ajStrDel(&re);
	    continue;
	}
    }


  /* Finished processing prosite.dat so look at prosite.doc */


    fn = ajStrNew();
    ajStrAssignS(&fn,dirname);
    ajStrAppendC(&fn,"prosite.doc");
    if(!(infdoc=ajFileNewInNameS(fn)))
	ajFatal("Cannot open file %S",fn);
    ajStrDel(&fn);



    fname  = ajStrNewC("PROSITE/");
    flag   = ajFalse;
    isopen = ajFalse;
    goback = ajFalse;


    while(ajReadlineTrim(infdoc, &text))
    {
	if(ajStrPrefixC(text, "{PS") && isopen && !goback)
	    goback = ajTrue;

	if(ajStrPrefixC(text, "{PS") && !isopen)
	{
	    storepos = ajFileResetPos(infdoc);
	    /* save out the documentation text to acc numbered outfiles . */
	    p = ajStrGetPtr(text)+1;
	    p = ajSysFuncStrtok(p, ";");
	    ajStrAssignS(&filename, fname);
	    ajStrAppendC(&filename, p);

	    outs = ajDatafileNewOutNameS(filename);
	    flag   = ajTrue;
	    isopen = ajTrue;
	    continue;
	}


	if(ajStrPrefixC(text, "{BEGIN}") && flag)
	{
	    while(ajReadlineTrim(infdoc, &text))
	    {
		if(ajStrPrefixC(text,"{END}"))
		    break;

		ajFmtPrintF(outs, "%S\n", text);
	    }
	    ajFileClose(&outs);
	    isopen = ajFalse;

	    if(goback)
	    {
		goback = ajFalse;
		ajFileSeek(infdoc,storepos,0);
	    }

	}
    }

    ajStrDel(&line);
    ajStrDel(&text);
    ajStrDel(&dirname);
    ajStrDel(&filename);

    ajStrDel(&id);
    ajStrDel(&ac);
    ajStrDel(&de);
    ajStrDel(&pa);
    ajStrDel(&re);
    ajStrDel(&ps);
    ajStrDel(&fname);


    ajFileClose(&infdat);
    ajFileClose(&infdoc);
    ajFileClose(&outf);

    embExit();

    return 0;
}