コード例 #1
0
AjBool gFormatGenbank(AjPSeq seq, AjPStr *inseq){
  AjPSeqout     seqout   = NULL;
  AjPFeattabOut featout  = NULL;
  AjPFeattable  feat     = NULL;
  AjPStr        seqline  = NULL;
  AjPStr        featline = NULL;
  AjPFile       seqfile  = NULL;
  AjPFile       featfile = NULL;
  AjPStr        filename = NULL;

  gAssignUniqueName(&filename);
  feat = ajSeqGetFeatCopy(seq);

  if(!feat)
    return ajFalse;

  seqout = ajSeqoutNew();

  if(!ajSeqoutOpenFilename(seqout,filename))
    embExitBad();

  ajSeqoutSetFormatS(seqout,ajStrNewC("genbank"));
  ajSeqoutWriteSeq(seqout,seq);
  ajSeqoutClose(seqout);
  ajSeqoutDel(&seqout);

  seqfile = ajFileNewInNameS(filename);
  ajSysFileUnlinkS(filename);

  featout = ajFeattabOutNew();

  if(!ajFeattabOutOpen(featout,filename))
    return ajFalse;

  ajFeattableWriteGenbank(featout,feat);

  ajFeattableDel(&feat);
  //ajFeattabOutDel(&featout);
  ajFileClose(&(featout->Handle));

  featfile = ajFileNewInNameS(filename);
  ajSysFileUnlinkS(filename);

  while(ajReadline(seqfile,&seqline)){
    if(ajStrMatchC(seqline,"ORIGIN\n")){
      while(ajReadline(featfile,&featline)){
	ajStrAppendS(inseq, featline);
      }
    }
    ajStrAppendS(inseq, seqline);
  }

  ajStrDel(&seqline);
  ajStrDel(&featline);
  ajStrDel(&filename);
  ajFileClose(&seqfile);
  ajFileClose(&featfile);

  return ajTrue;
}
コード例 #2
0
static void remap_read_file_of_enzyme_names(AjPStr *enzymes)
{
    AjPFile file = NULL;
    AjPStr line;
    const char *p = NULL;

    if(ajStrFindC(*enzymes, "@") == 0)
    {
	ajStrTrimC(enzymes, "@");	/* remove the @ */
	file = ajFileNewInNameS(*enzymes);
	if(file == NULL)
	    ajFatal("Cannot open the file of enzyme names: '%S'", enzymes);

	/* blank off the enzyme file name and replace with the enzyme names */
	ajStrSetClear(enzymes);
	line = ajStrNew();
	while(ajReadlineTrim(file, &line))
	{
	    p = ajStrGetPtr(line);
	    if(!*p || *p == '#' || *p == '!')
		continue;
	    ajStrAppendS(enzymes, line);
	    ajStrAppendC(enzymes, ",");
	}
	ajStrDel(&line);

	ajFileClose(&file);
    }

    return;
}
コード例 #3
0
static ajuint jaspscan_readmatrix(const AjPStr mfname, float ***matrix)
{

    AjPFile inf  = NULL;
    AjPStr line  = NULL;

    ajuint i = 0;
    ajuint cols = 0;

    AJCNEW0(*matrix,4);

    line = ajStrNew();

    inf = ajFileNewInNameS(mfname);
    if(!inf)
	ajFatal("Cannot open matrix file %S",mfname);

    i = 0;
    while(ajReadlineTrim(inf,&line))
    {
	if(!i)
	    cols = ajStrParseCountC(line," \n");

	(*matrix)[i++] = ajArrFloatLine(line," \n",1,cols);
    }
    


    ajStrDel(&line);
    ajFileClose(&inf);
    
    return cols;;
}
コード例 #4
0
static void jaspextract_readmatrixlist(AjPTable mtable, const AjPStr directory)
{
    const AjPStr datadir = NULL;

    AjPStr matrixfile = NULL;
    AjPFile inf = NULL;

    AjPStr line  = NULL;
    AjPStr key   = NULL;
    AjPStr value = NULL;
    
    matrixfile = ajStrNew();
    

    datadir = ajDatafileValuePath();
    if(!datadir)
        ajFatal("jaspextract: Cannot determine the EMBOSS data directory");
    
    ajFmtPrintS(&matrixfile,"%S%s",directory,MATRIXFILE);

    if(!ajFilenameExistsRead(matrixfile))
        ajFatal("jaspextract: Directory (%S) doesn't appear to be a JASPAR "
                "one\nNo matrix_list.txt file found",directory);


    inf  = ajFileNewInNameS(matrixfile);
    if(!inf)
        ajFatal("Cannot open input file: %S",matrixfile);

    while(ajReadline(inf,&line))
    {
        key = ajStrNew();
        
        if(ajFmtScanS(line,"%S",&key) != 1)
        {
            ajStrDel(&key);
            continue;
        }

        value = ajStrNew();
        ajStrAssignS(&value,line);

        ajTablePut(mtable,(void *)key, (void *)value);
    }
    

    ajFileClose(&inf);
    
    ajStrDel(&matrixfile);
    ajStrDel(&line);
        
    return;
}
コード例 #5
0
ファイル: gfile.c プロジェクト: ktnyt/GEMBASSY
AjBool gGetFileContent(AjPStr* content, AjPStr filename){
  AjPFile file    = NULL;
  AjPStr  line    = NULL;

  if((file = ajFileNewInNameS(filename)) == NULL)
    return ajFalse;

  while(ajReadline(file, &line))
    ajStrAppendS(content, line);

  if(file)
    ajFileClose(&file);

  ajSysFileUnlinkS(filename);

  return ajTrue;
}
コード例 #6
0
static PMemFile dbiblast_memfopenfile(const AjPStr name)
{
    PMemFile ret;
    AjPFile fp;

    fp = ajFileNewInNameS(name);
    if(!fp)
	return NULL;

    AJNEW0(ret);

    ajStrAssignS(&ret->Name, name);
    ret->IsMem = 0;
    ret->File  = fp;
    ret->Size  = 0;
    ret->Mem   = NULL;

    ajDebug("fopened '%S'\n", name);

    return ret;
}
コード例 #7
0
ファイル: notseq.c プロジェクト: WenchaoLin/JAMg
static void notseq_readfile(const AjPStr exclude, AjPStr *pattern)
{
    AjPFile file = NULL;
    AjPStr line;
    AjPStr filename = NULL;
    const char *p = NULL;

    if(ajStrFindC(exclude, "@") != 0)
    {
	ajStrAssignS(pattern, exclude);
    }
    else
    {
	ajStrAssignS(&filename, exclude);
        ajStrTrimC(&filename, "@");       /* remove the @ */
        file = ajFileNewInNameS(filename);
        if(file == NULL)
            ajFatal("Cannot open the file of sequence names: '%S'", filename);

        /* blank off the file name and replace with the sequence names */
        ajStrSetClear(pattern);
        line = ajStrNew();
        while(ajReadlineTrim(file, &line))
        {
            p = ajStrGetPtr(line);

            if(!*p || *p == '#' || *p == '!')
		continue;

            ajStrAppendS(pattern, line);
            ajStrAppendC(pattern, ",");
        }
        ajStrDel(&line);
        ajStrDel(&filename);

        ajFileClose(&file);
    }

    return;
}
コード例 #8
0
ファイル: ghttp.c プロジェクト: ktnyt/GEMBASSY
AjPFilebuff gHttpPostFileSS(AjPStr url, AjPStr filename)
{
  AjPFilebuff buff = NULL;
  AjPFile     file = NULL;
  AjPStr      line = NULL;
  AjPStr      cont = NULL;
  AjPStr      host = NULL;
  AjPStr      path = NULL;
  AjPStr      post = NULL;
  AjPStr      body = NULL;
  ajint       port = 80;
  ajuint      http = 0;
  FILE       *fp;

  char crlf[] = "\015\021";

  AjOSysSocket sock;
  AjOSysTimeout timo;

  post = ajStrNew();
  body = ajStrNew();
  cont = ajStrNew();

  file = ajFileNewInNameS(filename);

  while(ajReadline(file, &line))
    {
      ajStrAppendS(&cont, line);
    }

  ajHttpUrlDeconstruct(url, &port, &host, &path);

  while(buff==NULL || ajHttpRedirect(buff, &host, &port, &path, &http))
    {
      if(ajStrGetCharFirst(path) != '/')
	ajStrInsertK(&path, 0, '/');

      ajFmtPrintS(
        &body,
        "--xYzZY\015\012"
        "Content-Disposition: form-data; name=\"file\";"
        " filename=\"%S\"\015\012"
        "Content-Type: text/plain\015\012"
        "%S\015\012"
        "\015\012--xYzZY--\015\012",
        filename, cont
      );

      ajFmtPrintS(
        &post,
        "POST http://%S%S\n"
        "Content-Length: %d\n"
        "Content-Type: multipart/form-data; boundary=xYzZY\n\n"
        "%S",
        host, path,
        ajStrGetLen(body), body
      );

      ajFmtPrint("%S", post);

      fp = ajHttpOpen(NULL, host, port, post, &sock);

      buff = ajFilebuffNewFromCfile(fp);

      if(!buff)
	return NULL;
    }

  ajStrDel(&post);

  timo.seconds = 180;
  ajSysTimeoutSet(&timo);
  ajFilebuffLoadAll(buff);
  ajSysTimeoutUnset(&timo);

  return buff;
}
コード例 #9
0
int main(int argc, char **argv)
{
    AjPList      sigin   = NULL;   /* Signature input file names.            */
    AjPStr       signame = NULL;   /* Name of signature file.                */
    AjPFile      sigf    = NULL;   /* Signature input file.                  */
    EmbPSignature sig    = NULL;   /* Signature.                             */
    AjPList      siglist = NULL;   /* List of signatures.                    */
    AjIList      sigiter = NULL;   /* Iterator for siglist.                  */
    AjBool       sigok  = ajFalse; /* True if signature processed ok.        */
    
    EmbPHit      hit = NULL;      /* Hit to store signature-sequence match.  */
    AjPList      hits = NULL;     /* List of hits */


    AjPList      ligands = NULL;     /* List of top-scoring ligands. */

    AjPSeqall    database=NULL;   /* Protein sequences to match signature 
				     against.                                */
    AjPSeq       seq = NULL;      /* Current sequence.                       */
    AjPMatrixf   sub  =NULL;      /* Residue substitution matrix.            */
    float        gapo =0.0;       /* Gap insertion penalty.                  */
    float        gape =0.0;       /* Gap extension penalty.                  */

    AjPStr        nterm=NULL;     /* Holds N-terminal matching options from 
				     acd.                                    */
    ajint         ntermi=0;        /* N-terminal option as int. */

    AjPFile      hitsf =NULL;     /* Hits output file.                       
				     sequence matches.                       */
    AjPDirout    hitsdir=NULL;    /* Directory of hits files (output).       */

    AjPFile      alignf =NULL;    /* Alignment output file.                  */
    AjPDirout    aligndir=NULL;   /* Directory of alignment files (output).  */

    
    AjPFile    resultsf =NULL;    /* Results file (output).  */
    AjPDirout  resultsdir=NULL;   /* Directory of results files (output).  */

    AjPStr  mode         = NULL;  /* Mode, 1: Patch score mode, 2:
				     Site score mode.  */
    ajint   modei        = 0;     /* Selected mode as integer.  */

    SigPLighit lighit   = NULL;

    embInitPV("sigscanlig", argc, argv, "SIGNATURE",VERSION);
    

    /* GET VALUES FROM ACD */
    sigin      = ajAcdGetDirlist("siginfilesdir");
    database   = ajAcdGetSeqall("dbseqall");
    sub        = ajAcdGetMatrixf("sub");
    gapo       = ajAcdGetFloat("gapo");
    gape       = ajAcdGetFloat("gape");
    nterm      = ajAcdGetListSingle("nterm");
    hitsdir    = ajAcdGetOutdir("hitsoutdir");
    aligndir   = ajAcdGetOutdir("alignoutdir"); 
    resultsdir = ajAcdGetOutdir("resultsoutdir"); 
    mode        = ajAcdGetListSingle("mode");



    /*Assign N-terminal matching option etc. */
    ajFmtScanS(nterm, "%d", &ntermi);
    modei       = (ajint) ajStrGetCharFirst(mode)-48;



    /* READ & PROCESS SIGNATURES */
    siglist = ajListNew();
    while(ajListPop(sigin, (void **) &signame))
    {
	/* Read signature files, compile signatures and populate list. */
	sigok = ajFalse;
	if((sigf = ajFileNewInNameS(signame)))
	    if((sig = embSignatureReadNew(sigf)))
		if(embSignatureCompile(&sig, gapo, gape, sub))
		{
		    sigok=ajTrue;
		    ajListPushAppend(siglist, sig);
		    /*
		    ajFmtPrint("Id: %S\nDomid: %S\nLigid: %S\nns: %d\n"
                               "sn: %d\nnp: %d\npn: %d\nminpatch: %d\n"
                               "maxgap: %d\n", 
			       sig->Id, sig->Domid, sig->Ligid, sig->ns,
                               sig->sn, sig->np, sig->pn, sig->minpatch,
                               sig->maxgap); */
		    

		}
	if(!sigok)
	{
	    ajWarn("Could not process %S", signame);
	    embSignatureDel(&sig);
	    ajFileClose(&sigf);
	    ajStrDel(&signame);
	    continue;
	}

	ajFileClose(&sigf);
	ajStrDel(&signame);
    }
    ajListFree(&sigin);

    
    
    /* ALIGN EACH QUERY SEQUENCE TO LIST OF SIGNATURE */
    while(ajSeqallNext(database, &seq))
    {
	/* Do sequence-signature alignment and save results */
	hits = ajListNew();
	sigiter = ajListIterNew(siglist);
	
	while((sig = (EmbPSignature) ajListIterGet(sigiter)))
	{
	    if(embSignatureAlignSeq(sig, seq, &hit, ntermi))
	    {
		hit->Sig = sig;
		
		ajListPushAppend(hits, hit);
		hit=NULL; /* To force reallocation by embSignatureAlignSeq */
	    }
	    /* There has to be a hit for each signature for correct
	       generation of the LHF by sigscanlig_WriteFasta. So push
	       an empty hit if necessary.  'hit'=NULL forces
	       reallocation by embSignatureAlignSeq. */
	    /*
	       else
	       {
		hit = embHitNew();
		ajListPushAppend(hits, hit);
		hit=NULL; 
		}
		*/
	}
	
	ajListIterDel(&sigiter);
	

	/* Rank-order the list of hits by score */
	ajListSort(hits, embMatchinvScore);

	
	/* Write ligand hits & alignment files (output)  */	
	hitsf    = ajFileNewOutNameDirS(ajSeqGetNameS(seq), hitsdir);
	alignf   = ajFileNewOutNameDirS(ajSeqGetNameS(seq), aligndir);
	resultsf = ajFileNewOutNameDirS(ajSeqGetNameS(seq), resultsdir);
	

	
	/* if((!sigscanlig_WriteFasta(hitsf, siglist, hits)))
	    ajFatal("Bad args to sigscanlig_WriteFasta"); */

	if((!sigscanlig_WriteFasta(hitsf, hits)))
	    ajFatal("Bad args to sigscanlig_WriteFasta");


    	if((!sigscanlig_SignatureAlignWriteBlock(alignf, hits)))
	    ajFatal("Bad args to sigscanlig_SignatureAlignWriteBlock");

    	/* if((!sigscanlig_SignatureAlignWriteBlock(alignf, siglist, hits)))
	    ajFatal("Bad args to sigscanlig_SignatureAlignWriteBlock"); */


	/* Sort list of hits by ligand type and site number.
	   Process list of ligands and print out. */
	ajListSortTwo(hits, embMatchLigid, embMatchSN);


	if(modei==1)
	    ligands = sigscanlig_score_ligands_patch(hits);
	else if(modei==2)
	    ligands = sigscanlig_score_ligands_site(hits);
	else 
	    ajFatal("Unrecognised mode");
	

	sigscanlig_WriteResults(ligands, resultsf);	
	

    	ajFileClose(&hitsf);
	ajFileClose(&alignf);
	ajFileClose(&resultsf);


	/* Memory management */
	while(ajListPop(hits, (void **) &hit))
	    embHitDel(&hit);
	ajListFree(&hits);

        while(ajListPop(ligands, (void **) &lighit))
            sigscanlig_LigHitDel(&lighit);
        ajListFree(&ligands);
    }	
    

    /* MEMORY MANAGEMENT */
    while(ajListPop(siglist, (void **) &sig))
	embSignatureDel(&sig);
    ajListFree(&siglist);

    ajSeqallDel(&database);
    ajMatrixfDel(&sub);
	
    ajStrDel(&nterm);    
    ajDiroutDel(&hitsdir);
    ajDiroutDel(&aligndir);
    ajDiroutDel(&resultsdir);
    ajStrDel(&mode);


    embExit();

    return 0;    
}
コード例 #10
0
ファイル: kmafft.c プロジェクト: BioinformaticsArchive/KBWS
    int main(int argc, char **argv) {

    embInitPV("kmafft", argc, argv, "KBWS", "1.0.8");

    struct soap soap;
    struct ns1__mafftInputParams params;
    char* jobid;
    char* result;

    AjPSeqall  seqall;
    AjPSeq     seq;
    AjPFile    outf;
    AjPStr     substr;
    AjPStr     inseq = NULL;
    AjPStr     strategy;
    AjPStr     outorder;
    float      op;
    float      ep;
    AjPStr     scorematrix;
    AjBool     homologs;
    AjBool     showhomologs;
    float      threshold;
    AjPStr     referenceseq;
    AjPStr     harrplot;

    strategy     =      ajAcdGetString("strategy");
    outorder     =      ajAcdGetString("outorder");
    op           =      ajAcdGetFloat("op");
    ep           =      ajAcdGetFloat("ep");
    scorematrix  =      ajAcdGetString("scorematrix");
    homologs     =      ajAcdGetBoolean("homologs");
    showhomologs =      ajAcdGetBoolean("showhomologs");
    threshold    =      ajAcdGetFloat("threshold");
    referenceseq =      ajAcdGetString("referenceseq");
    harrplot     =      ajAcdGetString("harrplot");

    seqall = ajAcdGetSeqall("seqall");
    outf   = ajAcdGetOutfile("outfile");

    params.strategy = ajCharNewS(strategy);
    params.outorder = ajCharNewS(outorder);
    params.op = op;
    params.ep = ep;
    params.scorematrix = ajCharNewS(scorematrix);
    if (homologs) {
      params.homologs = xsd__boolean__true_;
    } else {
      params.homologs = xsd__boolean__false_;
    }
    if (showhomologs) {
      params.showhomologs = xsd__boolean__true_;
    } else {
      params.showhomologs = xsd__boolean__false_;
    }
    params.threshold = threshold;
    params.referenceseq = ajCharNewS(referenceseq);
    params.harrplot = ajCharNewS(harrplot);

    AjPStr     tmp         = NULL;
    AjPStr     tmpFileName = NULL;
    AjPSeqout  fil_file;
    AjPStr     line        = NULL; /* if "AjPStr line; -> ajReadline is not success!" */
    AjPStr     sizestr     = NULL;
    ajint      thissize;

    ajint      nb       = 0;
    AjBool     are_prot = ajFalse;
    ajint      size     = 0;
    AjPFile    infile;

    tmp = ajStrNewC("fasta");

    fil_file = ajSeqoutNew();
    tmpFileName = getUniqueFileName();

    if( !ajSeqoutOpenFilename(fil_file, tmpFileName) ) {
        embExitBad();
    }

    ajSeqoutSetFormatS(fil_file, tmp);

    while (ajSeqallNext(seqall, &seq)) {
      if (!nb) {
        are_prot  = ajSeqIsProt(seq);
    }
      ajSeqoutWriteSeq(fil_file, seq);
      ++nb;
    }
    ajSeqoutClose(fil_file);
    ajSeqoutDel(&fil_file);

    if (nb < 2) {
        ajFatal("Multiple alignments need at least two sequences");
    }

    infile = ajFileNewInNameS(tmpFileName);

    while (ajReadline(infile, &line)) {
      ajStrAppendS(&inseq,line);
      ajStrAppendC(&inseq,"\n");
    }

    soap_init(&soap);

    char* in0;
    in0 = ajCharNewS(inseq);
    if ( soap_call_ns1__runMafft( &soap, NULL, NULL, in0, &params, &jobid ) == SOAP_OK ) {
      fprintf(stderr,"Jobid: %s\n",jobid);
    } else {
      soap_print_fault(&soap, stderr);
    }

    int check = 0;
    while ( check == 0 ) {
      if ( soap_call_ns1__checkStatus( &soap, NULL, NULL, jobid,  &check ) == SOAP_OK ) {
        fprintf(stderr,"*");
      } else {
        soap_print_fault(&soap, stderr);
      }
      sleep(3);
    }

    fprintf(stderr,"\n");

    if ( soap_call_ns1__getResult( &soap, NULL, NULL, jobid,  &result ) == SOAP_OK ) {
      substr = ajStrNewC(result);
      ajFmtPrintF(outf,"%S\n",substr);
    } else {
      soap_print_fault(&soap, stderr);
    }

    ajSysFileUnlinkS(tmpFileName);

    soap_destroy(&soap);
    soap_end(&soap);
    soap_done(&soap);

    ajFileClose(&outf);

    ajSeqallDel(&seqall);
    ajSeqDel(&seq);
    ajStrDel(&substr);

    embExit();

    return 0;
}
コード例 #11
0
ファイル: kweblogo.c プロジェクト: agustin-avila/KBWS
int main(int argc, char **argv) {
  // initialize EMBASSY info
  embInitPV("kweblogo", argc, argv, "KBWS", "1.0.9");

  // soap driver and parameter object
  struct soap soap;
  struct ns1__weblogoInputParams params;

  char* jobid;

  AjPSeqall seqall;
  AjPSeq    seq;
  AjPStr    substr;
  AjPStr    inseq = NULL;

  // get input sequence
  seqall= ajAcdGetSeqall("seqall");

  // get/set parameters
  params.format = ajCharNewS(ajAcdGetString("format"));

  AjPStr     tmp= NULL;
  AjPStr     tmpFileName= NULL;
  AjPSeqout  fil_file;
  AjPStr     line= NULL; /* if "AjPStr line; -> ajReadline is not success!" */
  AjPStr sizestr= NULL;
  ajint thissize;

  ajint   nb= 0;
  AjBool  are_prot= ajFalse;
  ajint   size= 0;
  AjPFile infile;

  AjPFile goutf;
  AjPStr  goutfile;

  goutfile= ajAcdGetString("goutfile");

  tmp= ajStrNewC("fasta");

  fil_file= ajSeqoutNew();
  tmpFileName= getUniqueFileName();

  if(!ajSeqoutOpenFilename(fil_file, tmpFileName)) {
    embExitBad();
  }

  ajSeqoutSetFormatS(fil_file, tmp);

  while (ajSeqallNext(seqall, &seq)) {
    if (!nb) {
      are_prot  = ajSeqIsProt(seq);
    }
    ajSeqoutWriteSeq(fil_file, seq);
    ++nb;
  }
  ajSeqoutClose(fil_file);
  ajSeqoutDel(&fil_file);

  if (nb < 2) {
    ajFatal("Multiple alignments need at least two sequences");
  }

  infile = ajFileNewInNameS(tmpFileName);

  while (ajReadline(infile, &line)) {
    ajStrAppendS(&inseq,line);
    ajStrAppendC(&inseq,"\n");
  }

  soap_init(&soap);

  char* in0;
  in0= ajCharNewS(inseq);
  if (soap_call_ns1__runWeblogo( &soap, NULL, NULL, in0, &params, &jobid) == SOAP_OK) {
  } else {
    soap_print_fault(&soap, stderr);
  }

  int check= 0;
  while (check == 0 ) {
    if (soap_call_ns1__checkStatus(&soap, NULL, NULL, jobid,  &check) == SOAP_OK) {
    } else {
      soap_print_fault(&soap, stderr);
    }
    sleep(3);
  }

  char* image_url;
  if (soap_call_ns1__getResult(&soap, NULL, NULL, jobid,  &image_url) == SOAP_OK) {
    goutf= ajFileNewOutNameS(goutfile);

    if (!goutf) {
      // can not open image output file
      ajFmtError("Problem writing out image file");
      embExitBad();
    }

    if (!gHttpGetBinC(image_url, &goutf)) {
      // can not download image file
      ajFmtError("Problem downloading image file");
      embExitBad();
    }
  } else {
    soap_print_fault(&soap, stderr);
  }

  // delete temporary multi-fasta sequence file
  ajSysFileUnlinkS(tmpFileName);

  // destruct SOAP driver
  soap_destroy(&soap);
  soap_end(&soap);
  soap_done(&soap);

  // destruct EMBOSS object
  ajSeqallDel(&seqall);
  ajSeqDel(&seq);
  ajStrDel(&substr);

  // exit
  embExit();

  return 0;
}
コード例 #12
0
int main(int argc, char **argv)
{
    EmbPBtreeEntry entry = NULL;
    
    AjPStr dbname   = NULL;
    AjPStr dbrs     = NULL;
    AjPStr release  = NULL;
    AjPStr datestr  = NULL;
    AjBool statistics;
    AjBool compressed;

    AjPStr directory;
    AjPStr indexdir;
    AjPStr filename;
    AjPStr exclude;
    AjPStr dbtype = NULL;
    AjPFile outf = NULL;

    AjPStr *fieldarray = NULL;
    
    ajint nfields;
    ajint nfiles;

    AjPStr tmpstr = NULL;
    AjPStr thysfile = NULL;
    
    ajint i;
    AjPFile inf = NULL;

    AjPStr word = NULL;
    
    AjPBtId  idobj  = NULL;
    AjPBtPri priobj = NULL;
    AjPBtHybrid hyb = NULL;
    
    ajulong nentries = 0L;
    ajulong ientries = 0L;
    AjPTime starttime = NULL;
    AjPTime begintime = NULL;
    AjPTime nowtime = NULL;
    ajlong startclock = 0;
    ajlong beginclock = 0;
    ajlong nowclock = 0;
    
    ajulong idcache=0L, idread = 0L, idwrite = 0L, idsize= 0L;
    ajulong accache=0L, acread = 0L, acwrite = 0L, acsize= 0L;
    ajulong svcache=0L, svread = 0L, svwrite = 0L, svsize= 0L;
    ajulong kwcache=0L, kwread = 0L, kwwrite = 0L, kwsize= 0L;
    ajulong decache=0L, deread = 0L, dewrite = 0L, desize= 0L;
    ajulong txcache=0L, txread = 0L, txwrite = 0L, txsize= 0L;

    double tdiff = 0.0;
    ajint days = 0;
    ajint hours = 0;
    ajint mins = 0;
    
    embInit("dbxflat", argc, argv);

    dbtype     = ajAcdGetListSingle("idformat");
    fieldarray = ajAcdGetList("fields");
    directory  = ajAcdGetDirectoryName("directory");
    outf       = ajAcdGetOutfile("outfile");
    indexdir   = ajAcdGetOutdirName("indexoutdir");
    filename   = ajAcdGetString("filenames");
    exclude    = ajAcdGetString("exclude");
    dbname     = ajAcdGetString("dbname");
    dbrs       = ajAcdGetString("dbresource");
    release    = ajAcdGetString("release");
    datestr    = ajAcdGetString("date");
    statistics = ajAcdGetBoolean("statistics");
    compressed = ajAcdGetBoolean("compressed");

    entry = embBtreeEntryNew();
    if(compressed)
        embBtreeEntrySetCompressed(entry);

    tmpstr = ajStrNew();
    
    idobj   = ajBtreeIdNew();
    priobj  = ajBtreePriNew();
    hyb     = ajBtreeHybNew();
    

    nfields = embBtreeSetFields(entry,fieldarray);
    embBtreeSetDbInfo(entry,dbname,dbrs,datestr,release,dbtype,directory,
		      indexdir);

    for(i=0; i< nfields; i++)
    {
        if(ajStrMatchC(fieldarray[i], "acc"))
        {
            accfield = embBtreeGetFieldS(entry, fieldarray[i]);
            if(compressed)
                embBtreeFieldSetCompressed(accfield);
        }
        else if(ajStrMatchC(fieldarray[i], "sv"))
        {
            svfield = embBtreeGetFieldS(entry, fieldarray[i]);
            if(compressed)
                embBtreeFieldSetCompressed(svfield);
        }
        else if(ajStrMatchC(fieldarray[i], "des"))
        {
            desfield = embBtreeGetFieldS(entry, fieldarray[i]);
            if(compressed)
                embBtreeFieldSetCompressed(desfield);
        }
        else if(ajStrMatchC(fieldarray[i], "key"))
        {
            keyfield = embBtreeGetFieldS(entry, fieldarray[i]);
            if(compressed)
                embBtreeFieldSetCompressed(keyfield);
        }
        else if(ajStrMatchC(fieldarray[i], "org"))
        {
            orgfield = embBtreeGetFieldS(entry, fieldarray[i]);
            if(compressed)
                embBtreeFieldSetCompressed(orgfield);
        }
        else if(!ajStrMatchC(fieldarray[i], "id"))
            ajErr("Unknown field '%S' specified for indexing", fieldarray[i]);
    }

    embBtreeGetRsInfo(entry);

    nfiles = embBtreeGetFiles(entry,directory,filename,exclude);
    if(!nfiles)
        ajDie("No input files in '%S' matched filename '%S'",
              directory, filename);

    embBtreeWriteEntryFile(entry);

    embBtreeOpenCaches(entry);

    starttime = ajTimeNewToday();

    ajFmtPrintF(outf, "Processing directory: %S\n", directory);

    for(i=0;i<nfiles;++i)
    {
        begintime = ajTimeNewToday();
        beginclock = ajClockNow();

	ajListPop(entry->files,(void **)&thysfile);
	ajListPushAppend(entry->files,(void *)thysfile);
	ajFmtPrintS(&tmpstr,"%S%S",entry->directory,thysfile);
	if(!(inf=ajFileNewInNameS(tmpstr)))
	    ajFatal("Cannot open input file %S\n",tmpstr);
	ajFilenameTrimPath(&tmpstr);
	ajFmtPrintF(outf,"Processing file: %S",tmpstr);

	ientries = 0L;

	while(dbxflat_NextEntry(entry,inf))
	{
	    ++ientries;

	    if(entry->do_id)
	    {
                if(ajStrGetLen(entry->id) > entry->idlen)
                {
                    if(ajStrGetLen(entry->id) > maxidlen)
                    {
                        ajWarn("id '%S' too long, truncating to idlen %d",
                               entry->id, entry->idlen);
                        maxidlen = ajStrGetLen(entry->id);
                    }
                    idtrunc++;
                    ajStrKeepRange(&entry->id,0,entry->idlen-1);
                }
    
		ajStrFmtLower(&entry->id);
		ajStrAssignS(&hyb->key1,entry->id);
		hyb->dbno = i;
		hyb->offset = entry->fpos;
		hyb->dups = 0;
		ajBtreeHybInsertId(entry->idcache,hyb);
                ++idtot;
	    }

	    if(accfield)
	    {
                while(ajListPop(accfield->data,(void **)&word))
                {
		    ajStrFmtLower(&word);
                    ajStrAssignS(&hyb->key1,word);
                    hyb->dbno = i;
		    hyb->offset = entry->fpos;
		    hyb->dups = 0;
		    ajBtreeHybInsertId(accfield->cache,hyb);
                    ++acctot;
		    ajStrDel(&word);
                }
	    }

	    if(svfield)
	    {
                while(ajListPop(svfield->data,(void **)&word))
                {
		    ajStrFmtLower(&word);
                    ajStrAssignS(&hyb->key1,word);
                    hyb->dbno = i;
		    hyb->offset = entry->fpos;
		    hyb->dups = 0;
		    ajBtreeHybInsertId(svfield->cache,hyb);
                    ++svtot;
		    ajStrDel(&word);
                }
	    }

	    if(keyfield)
	    {
                while(ajListPop(keyfield->data,(void **)&word))
                {
		    ajStrFmtLower(&word);
		    ajStrAssignS(&priobj->id,entry->id);
                    ajStrAssignS(&priobj->keyword,word);
                    priobj->treeblock = 0;
                    ajBtreeInsertKeyword(keyfield->cache, priobj);
                    ++keytot;
		    ajStrDel(&word);
                }
	    }

	    if(desfield)
	    {
                while(ajListPop(desfield->data,(void **)&word))
                {
		    ajStrFmtLower(&word);
		    ajStrAssignS(&priobj->id,entry->id);
                    ajStrAssignS(&priobj->keyword,word);
                    priobj->treeblock = 0;
                    if(ajBtreeInsertKeyword(desfield->cache, priobj))
                        ++destot;
		    ajStrDel(&word);
                }
	    }

	    if(orgfield)
	    {
                while(ajListPop(orgfield->data,(void **)&word))
                {
		    ajStrFmtLower(&word);
		    ajStrAssignS(&priobj->id,entry->id);
                    ajStrAssignS(&priobj->keyword,word);
                    priobj->treeblock = 0;
                    ajBtreeInsertKeyword(orgfield->cache, priobj);
                    ++orgtot;
		    ajStrDel(&word);
                }
	    }
	}
	
	ajFileClose(&inf);
	nentries += ientries;
	nowtime = ajTimeNewToday();
        nowclock = ajClockNow();
	ajFmtPrintF(outf, " entries: %Lu (%Lu) time: %.1f/%.1fs (%.1f/%.1fs)\n",
		    nentries, ientries,
		    ajClockDiff(startclock,nowclock),
                    ajTimeDiff(starttime, nowtime),
		    ajClockDiff(beginclock,nowclock),
                    ajTimeDiff(begintime, nowtime));
        if(statistics)
        {
            if(entry->do_id)
                ajBtreeCacheStatsOut(outf, entry->idcache,
                                     &idcache, &idread, &idwrite, &idsize);
            if(accfield)
                ajBtreeCacheStatsOut(outf, accfield->cache,
                                     &accache, &acread, &acwrite, &acsize);
            if(svfield)
                ajBtreeCacheStatsOut(outf, svfield->cache,
                                     &svcache, &svread, &svwrite, &svsize);
            if(keyfield)
                ajBtreeCacheStatsOut(outf, keyfield->cache,
                                     &kwcache, &kwread, &kwwrite, &kwsize);
            if(desfield)
                ajBtreeCacheStatsOut(outf, desfield->cache,
                                     &decache, &deread, &dewrite, &desize);
            if(orgfield)
                ajBtreeCacheStatsOut(outf, orgfield->cache,
                                     &txcache, &txread, &txwrite, &txsize);
        }

	ajTimeDel(&begintime);
	ajTimeDel(&nowtime);
    }
    


    embBtreeDumpParameters(entry);
    embBtreeCloseCaches(entry);
    
    nowtime = ajTimeNewToday();
    tdiff = ajTimeDiff(starttime, nowtime);
    days = (ajint) (tdiff/(24.0*3600.0));
    tdiff -= (24.0*3600.0)*(double)days;
    hours = (ajint) (tdiff/3600.0);
    tdiff -= 3600.0*(double)hours;
    mins = (ajint) (tdiff/60.0);
    tdiff -= 60.0 * (double) mins;
    if(days)
        ajFmtPrintF(outf, "Total time: %d %02d:%02d:%04.1f\n",
                    days, hours, mins, tdiff);
    else if (hours)
        ajFmtPrintF(outf, "Total time: %d:%02d:%04.1f\n",
                    hours, mins, tdiff);
    else 
        ajFmtPrintF(outf, "Total time: %d:%04.1f\n",
                    mins, tdiff);
    ajTimeDel(&nowtime);
    ajTimeDel(&starttime);

    if(maxidlen)
    {
        ajFmtPrintF(outf,
                    "Resource idlen truncated %u IDs. "
                    "Maximum ID length was %u.",
                    idtrunc, maxidlen);
        ajWarn("Resource idlen truncated %u IDs. Maximum ID length was %u.",
               idtrunc, maxidlen);
    }

    ajFileClose(&outf);
    embBtreeEntryDel(&entry);
    ajStrDel(&tmpstr);
    ajStrDel(&filename);
    ajStrDel(&exclude);
    ajStrDel(&dbname);
    ajStrDel(&dbrs);
    ajStrDel(&release);
    ajStrDel(&datestr);
    ajStrDel(&directory);
    ajStrDel(&indexdir);
    ajStrDel(&dbtype);    

    nfields = 0;
    while(fieldarray[nfields])
	ajStrDel(&fieldarray[nfields++]);
    AJFREE(fieldarray);


    ajBtreeIdDel(&idobj);
    ajBtreePriDel(&priobj);
    ajBtreeHybDel(&hyb);

    ajRegFree(&dbxflat_wrdexp);
    embExit();

    return 0;
}
コード例 #13
0
int main(int argc, char **argv)
{
    EmbPBtreeEntry entry = NULL;
    
    AjPStr dbname   = NULL;
    AjPStr dbrs     = NULL;
    AjPStr release  = NULL;
    AjPStr datestr  = NULL;

    AjPStr directory;
    AjPStr indexdir;
    AjPStr filename;
    AjPStr exclude;
    AjPStr dbtype = NULL;
    AjPFile outf = NULL;

    AjPStr *fieldarray = NULL;
    
    ajint nfields;
    ajint nfiles;

    AjPStr tmpstr = NULL;
    AjPStr thysfile = NULL;
    
    ajint i;
    AjPFile inf = NULL;

    AjPStr word = NULL;
    
    AjPBtId  idobj  = NULL;
    AjPBtPri priobj = NULL;
    AjPBtHybrid hyb = NULL;
    
    ajulong nentries = 0L;
    ajulong ientries = 0L;
    AjPTime starttime = NULL;
    AjPTime begintime = NULL;
    AjPTime nowtime = NULL;
    ajlong startclock = 0;
    ajlong beginclock = 0;
    ajlong nowclock = 0;
    
    embInit("dbxflat", argc, argv);

    dbtype     = ajAcdGetListSingle("idformat");
    fieldarray = ajAcdGetList("fields");
    directory  = ajAcdGetDirectoryName("directory");
    outf       = ajAcdGetOutfile("outfile");
    indexdir   = ajAcdGetOutdirName("indexoutdir");
    filename   = ajAcdGetString("filenames");
    exclude    = ajAcdGetString("exclude");
    dbname     = ajAcdGetString("dbname");
    dbrs       = ajAcdGetString("dbresource");
    release    = ajAcdGetString("release");
    datestr    = ajAcdGetString("date");

    entry = embBtreeEntryNew();
    tmpstr = ajStrNew();
    
    idobj   = ajBtreeIdNew();
    priobj  = ajBtreePriNew();
    hyb     = ajBtreeHybNew();
    

    nfields = embBtreeSetFields(entry,fieldarray);
    embBtreeSetDbInfo(entry,dbname,dbrs,datestr,release,dbtype,directory,
		      indexdir);

    embBtreeGetRsInfo(entry);

    nfiles = embBtreeGetFiles(entry,directory,filename,exclude);
    embBtreeWriteEntryFile(entry);

    embBtreeOpenCaches(entry);

    starttime = ajTimeNewToday();

    ajFmtPrintF(outf, "Processing directory: %S\n", directory);

    for(i=0;i<nfiles;++i)
    {
        begintime = ajTimeNewToday();
        beginclock = ajClockNow();

	ajListPop(entry->files,(void **)&thysfile);
	ajListPushAppend(entry->files,(void *)thysfile);
	ajFmtPrintS(&tmpstr,"%S%S",entry->directory,thysfile);
	if(!(inf=ajFileNewInNameS(tmpstr)))
	    ajFatal("Cannot open input file %S\n",tmpstr);
	ajFilenameTrimPath(&tmpstr);
	ajFmtPrintF(outf,"Processing file: %S",tmpstr);

	ientries = 0L;

	while(dbxflat_NextEntry(entry,inf))
	{
	    ++ientries;
	    if(entry->do_id)
	    {
                if(ajStrGetLen(entry->id) > entry->idlen)
                {
                    if(ajStrGetLen(entry->id) > maxidlen)
                    {
                        ajWarn("id '%S' too long, truncating to idlen %d",
                               entry->id, entry->idlen);
                        maxidlen = ajStrGetLen(entry->id);
                    }
                    idtrunc++;
                    ajStrKeepRange(&entry->id,0,entry->idlen-1);
                }
    
		ajStrFmtLower(&entry->id);
		ajStrAssignS(&hyb->key1,entry->id);
		hyb->dbno = i;
		hyb->offset = entry->fpos;
		hyb->dups = 0;
		ajBtreeHybInsertId(entry->idcache,hyb);
	    }

	    if(entry->do_accession)
	    {
                while(ajListPop(entry->ac,(void **)&word))
                {
		    ajStrFmtLower(&word);
                    ajStrAssignS(&hyb->key1,word);
                    hyb->dbno = i;
		    hyb->offset = entry->fpos;
		    hyb->dups = 0;
		    ajBtreeHybInsertId(entry->accache,hyb);
		    ajStrDel(&word);
                }
	    }

	    if(entry->do_sv)
	    {
                while(ajListPop(entry->sv,(void **)&word))
                {
		    ajStrFmtLower(&word);
                    ajStrAssignS(&hyb->key1,word);
                    hyb->dbno = i;
		    hyb->offset = entry->fpos;
		    hyb->dups = 0;
		    ajBtreeHybInsertId(entry->svcache,hyb);
		    ajStrDel(&word);
                }
	    }

	    if(entry->do_keyword)
	    {
                while(ajListPop(entry->kw,(void **)&word))
                {
		    ajStrFmtLower(&word);
		    ajStrAssignS(&priobj->id,entry->id);
                    ajStrAssignS(&priobj->keyword,word);
                    priobj->treeblock = 0;
                    ajBtreeInsertKeyword(entry->kwcache, priobj);
		    ajStrDel(&word);
                }
	    }

	    if(entry->do_description)
	    {
                while(ajListPop(entry->de,(void **)&word))
                {
		    ajStrFmtLower(&word);
		    ajStrAssignS(&priobj->id,entry->id);
                    ajStrAssignS(&priobj->keyword,word);
                    priobj->treeblock = 0;
                    ajBtreeInsertKeyword(entry->decache, priobj);
		    ajStrDel(&word);
                }
	    }

	    if(entry->do_taxonomy)
	    {
                while(ajListPop(entry->tx,(void **)&word))
                {
		    ajStrFmtLower(&word);
		    ajStrAssignS(&priobj->id,entry->id);
                    ajStrAssignS(&priobj->keyword,word);
                    priobj->treeblock = 0;
                    ajBtreeInsertKeyword(entry->txcache, priobj);
		    ajStrDel(&word);
                }
	    }
	}
	
	ajFileClose(&inf);
	nentries += ientries;
	nowtime = ajTimeNewToday();
        nowclock = ajClockNow();
	ajFmtPrintF(outf, " entries: %Lu (%Lu) time: %.1f/%.1fs (%.1f/%.1fs)\n",
		    nentries, ientries,
		    ajClockDiff(startclock,nowclock),
                    ajTimeDiff(starttime, nowtime),
		    ajClockDiff(beginclock,nowclock),
                    ajTimeDiff(begintime, nowtime));
	ajTimeDel(&begintime);
	ajTimeDel(&nowtime);
    }
    


    embBtreeDumpParameters(entry);
    embBtreeCloseCaches(entry);
    
    nowtime = ajTimeNewToday();
    ajFmtPrintF(outf, "Total time: %.1fs\n", ajTimeDiff(starttime, nowtime));
    ajTimeDel(&nowtime);
    ajTimeDel(&starttime);

    if(maxidlen)
    {
        ajFmtPrintF(outf,
                    "Resource idlen truncated %u IDs. "
                    "Maximum ID length was %u.",
                    idtrunc, maxidlen);
        ajWarn("Resource idlen truncated %u IDs. Maximum ID length was %u.",
               idtrunc, maxidlen);
    }
    
    ajFileClose(&outf);
    embBtreeEntryDel(&entry);
    ajStrDel(&tmpstr);
    ajStrDel(&filename);
    ajStrDel(&exclude);
    ajStrDel(&dbname);
    ajStrDel(&dbrs);
    ajStrDel(&release);
    ajStrDel(&datestr);
    ajStrDel(&directory);
    ajStrDel(&indexdir);
    ajStrDel(&dbtype);    

    nfields = 0;
    while(fieldarray[nfields])
	ajStrDel(&fieldarray[nfields++]);
    AJFREE(fieldarray);


    ajBtreeIdDel(&idobj);
    ajBtreePriDel(&priobj);
    ajBtreeHybDel(&hyb);

    ajRegFree(&dbxflat_wrdexp);
    embExit();

    return 0;
}
コード例 #14
0
int main(ajint argc, char **argv)
{
  /* Variable declarations */
  AjPFile   inf_edam   = NULL;  /* Name of EDAM data (input) file   */
  AjPFile   acdoutf    = NULL;  /* Name of ACD (output) file        */
  
  AjPList   acdinlist  = NULL;  /* List of ACD file names (input)   */
  AjPFile   acdinf     = NULL;  /* Name of ACD (input) file         */
  AjPStr    acdname    = NULL;  /* Name of current acd file         */
  AjPDirout acdoutdir  = NULL;  /* Directory for ACD files (output) */

  AjPFile   inf_ktype  = NULL;  /* Name of knowntypes.standard file */
  
  PEdam   edam         = NULL;  /* EDAM relations data              */
  PKtype  ktype        = NULL;  /* Data from knowntype.standard     */


  
  /* Read data from acd. */
  embInitP("acdrelations",argc,argv,"MYEMBOSS");
    
  /* ACD data handling */
  inf_edam   = ajAcdGetDatafile("infileedam");
  inf_ktype  = ajAcdGetInfile("infiletype");
  acdinlist  = ajAcdGetDirlist("indir");  
  acdoutdir  = ajAcdGetOutdir("outdir");
  
  /* Read data file */  
  edam  = ajEdamNew();
  ktype = ajKtypeNew();
    
  acdrelations_readdatfile(inf_edam, &edam);
  acdrelations_readtypefile(inf_ktype, &ktype);


  /*  Main application loop. Process each ACD file in turn.  */
  while(ajListPop(acdinlist,(void **)&acdname))
  {
      if(!(acdinf = ajFileNewInNameS(acdname)))   
          ajFatal("Cannot open input ACD file %S\n", acdname);
      
      ajFilenameTrimPath(&acdname);
            
      if(!(acdoutf = ajFileNewOutNameDirS(acdname, acdoutdir)))
          ajFatal("Cannot open output ACD file %S\n", acdname);

      acdrelations_procacdfile(acdinf, acdoutf, edam, ktype);
      
      ajFileClose(&acdinf);
      ajFileClose(&acdoutf);
  }
  
  /* Clean up and exit */
  ajFileClose(&inf_edam);
  ajFileClose(&inf_ktype);
  ajListFree(&acdinlist);
  ajDiroutDel(&acdoutdir);

  ajEdamDel(&edam);

  ajExit();
  return 0;
}
コード例 #15
0
int main(ajint argc, char **argv)
{
    AjPList  ccfin        = NULL;  /* List of CCF (input) files.             */

    AjPDir   pdbin        = NULL;  /* Path of pdb input files.               */
    AjPStr   pdbprefix    = NULL;  /* Prefix of pdb input files.             */
    AjPStr   pdb_name     = NULL;  /* Full name (path/name/extension) of 
					 pdb format input file.              */

    AjPDirout ccfout     = NULL;   /* Path of coordinate output file.        */
    AjPStr   randomname  = NULL;   /* Name for temp file tempf.              */
    AjPStr   ccf_this    = NULL; 
    AjPStr   exec        = NULL; 
    AjPStr   naccess_str = NULL; 
    AjPStr   line        = NULL;
    AjPStr   syscmd      = NULL;   /* Command line arguments.                */
    AjPStr  *mode        = NULL;   /* Mode of operation from acd.            */

    AjPFile  errf        = NULL;   /* pdbplus error file pointer.            */
    AjPFile  serrf       = NULL;   /* stride error file pointer.             */
    AjPFile  nerrf       = NULL;   /* stride error file pointer.             */
    AjPFile  tempf       = NULL;   /* Temp file for holding STRIDE output.   */
    AjPFile  ccf_inf     = NULL;   /* Protein coordinate input file.         */
    AjPFile  ccf_outf    = NULL;   /* Protein coordinate output file.        */

    AjIList  iter        = NULL; 

    AjBool   done_naccess= ajFalse;
    AjBool   done_stride = ajFalse;
    AjBool   found       = ajFalse;
    AjPResidue temp_res  = NULL;  /* Pointer to Residue object.                */
    AjPPdb   pdb_old     = NULL;  /* Pointer to PDB object - without new
				     stride elements.                       */
    AjPPdb   pdb         = NULL;  /* Pointer to PDB object.                 */
    ajint    idn         = 0;     /* Chain identifier as a number (1,2,...) */
    ajint    chain_num   = 0;     /* Chain identifier index (0,1,...).      */
    ajint    tS          = 0;     /* User-defined threshold size for SSEs.  */
    ajint    nostride    = 0;     /* No. times stride failed                */
    ajint    nonaccess   = 0;     /* No. times naccess failed               */
    ajint    nofile      = 0;     /* No. times of file error                */

    /* Variables for each item that will be parsed from the ASG line. */
    AjPStr   res      = NULL;  /* Residue id from STRIDE ASG line (ALA etc). */
    AjPStr   res_num  = NULL;  /* PDB residue number from STRIDE ASG line.   */
    char     pcid     = ' ';   /* Protein chain identifier from STRIDE or 
				  NACESS output (A,B, etc).                  */
    char     ss       = ' ';   /* One-letter secondary structure code from 
				  STRIDE ASG line.                           */
    float    ph       = 0.0;   /* Phi angle from STRIDE ASG line.            */
    float    ps       = 0.0;   /* Psi angle from STRIDE ASG line.            */
    float    sa       = 0.0;   /* Residue solvent accessible area from STRIDE 
				  ASG line.                                  */
    float    f1       = 0;
    float    f2       = 0;
    float    f3       = 0;
    float    f4       = 0;
    float    f5       = 0;
    float    f6       = 0;
    float    f7       = 0;
    float    f8       = 0;
    float    f9       = 0;
    float    f10      = 0;





    /* Allocate strings; this section is used for variables that are 
       allocated once only. */
    pdb_name       = ajStrNew();
    res            = ajStrNew();
    res_num        = ajStrNew();
    randomname     = ajStrNew();
    syscmd         = ajStrNew();
    line           = ajStrNew();  
    naccess_str    = ajStrNew();
    exec           = ajStrNew();





    /* Read data from acd. */
    embInitPV("pdbplus",argc,argv,"STRUCTURE",VERSION);

    ccfin        = ajAcdGetDirlist("ccfinpath");  
    pdbin        = ajAcdGetDirectory("pdbindir"); 
    pdbprefix    = ajAcdGetString("pdbprefix");
    ccfout       = ajAcdGetOutdir("ccfoutdir");
    mode         = ajAcdGetList("mode");
    errf         = ajAcdGetOutfile("logfile");
    if(ajStrGetCharFirst(*mode) != '2')
	serrf    = ajAcdGetOutfile("slogfile");
    if(ajStrGetCharFirst(*mode) != '1')
	nerrf    = ajAcdGetOutfile("nlogfile");
    tS           = ajAcdGetInt("thresholdsize");
 

    


    
    ajRandomSeed();
    ajFilenameSetTempname(&randomname); 




    
    /* 
     **  Start of main application loop. 
     **  Process each PDB/ protein coordinate file (EMBL format) in turn. 
     */ 
    
    while(ajListPop(ccfin,(void **)&ccf_this))
    {
        /* Open protein coordinate file.  If it cannot be opened, write a 
           message to the error file, delete ccf_this and continue. */

        if((ccf_inf = ajFileNewInNameS(ccf_this)) == NULL)   
	{
	    ajWarn("%s%S\n//\n", 
		   "clean coordinate file not found: ", ccf_this);
	    
	    ajFmtPrintF(errf, "%s%S\n//\n", 
                        "clean coordinate file not found: ", ccf_this); 
            ajStrDel(&ccf_this); 
	    nofile++;
	    continue; 
        }       

        ajFmtPrint("Processing %S\n", ccf_this);
	fflush(stdout);

        /* Parse protein coordinate data (from clean format file) into 
	   AjPPdb object.  ajPdbReadAllModelsNew will create the AjPPdb object. */
      if(!(pdb_old=ajPdbReadAllModelsNew(ccf_inf)))
        {
	    ajWarn("ERROR Clean coordinate file read" 
		   "error: %S\n//\n", ccf_this);
            ajFmtPrintF(errf, "ERROR Clean coordinate file read" 
			"error: %S\n//\n", ccf_this);
            ajFileClose(&ccf_inf);
            ajStrDel(&ccf_this); 
	    nofile++;
            continue;
        }

        ajFileClose(&ccf_inf);
        ajPdbCopy(&pdb, pdb_old); 
        ajPdbDel(&pdb_old); 

        /* Construct name of corresponding PDB file.
	    NACCESS does *not* generate an output file if the path is './' e.g. 
	    naccess ./1rbp.ent , therefore replace './' with null. */
	ajStrAssignS(&pdb_name, ajDirGetPath(pdbin));
	if(ajStrMatchC(pdb_name, "./") || ajStrMatchC(pdb_name, "."))
	    ajStrAssignC(&pdb_name, "");
	
        ajStrAppendS(&pdb_name, pdbprefix);
	ajStrFmtLower(&pdb->Pdb);
        ajStrAppendS(&pdb_name, pdb->Pdb);
        ajStrAppendC(&pdb_name, ".");
	ajStrAppendS(&pdb_name, ajDirGetExt(pdbin));
	

        /* Check corresponding PDB file exists for reading using ajFileStat. */
	if(!(ajFilenameExistsRead(pdb_name)))
        {
            ajFmtPrintF(errf, "%s%S\n//\n", "PDB file not found: ", pdb_name);
            ajWarn("%s%S\n//\n", "PDB file not found: ", pdb_name);
            ajStrDel(&ccf_this); 
            ajPdbDel(&pdb);
	    nofile++;
            continue;
        }
        
	if(ajStrGetCharFirst(*mode) != '2')
        {        
	    /* 
	     **  Create a string containing the STRIDE command line (it needs
	     **  PDB file name & name of temp output file).
	     **  Call STRIDE by using ajSystem.
	     */
	    
	    ajFmtPrintS(&syscmd, "%S %S -f%S >> %s 2>&1",  
			ajAcdGetpathC("stride"),
                        pdb_name, randomname, ajFileGetNameC(serrf));
	    ajFmtPrint("%S %S -f%S >> %s 2>&1\n",  
		       ajAcdGetpathC("stride"),
                       pdb_name, randomname,ajFileGetNameC(serrf));
	    system(ajStrGetPtr(syscmd));  

	    
	    /* Open the stride output file */
	    if (((tempf = ajFileNewInNameS(randomname)) == NULL))
	    {
		ajWarn("%s%S\n//\n", 
		       "no stride output for: ", pdb_name); 
		ajFmtPrintF(errf, "%s%S\n//\n", 
			    "no stride output for: ", pdb_name); 
		nostride++;
		ajStrDel(&ccf_this);
		ajPdbDel(&pdb); 
		continue; 
	    } 
	    else
	      ajFmtPrintF(errf, "%s%S\n//\n", 
			  "stride output for: ", pdb_name); 

	    
	    done_stride = ajFalse;

	    /* Parse STRIDE output from temp output file a line at a time. */
	    while(ajReadlineTrim(tempf,&line))
	    {       
		if(ajStrPrefixC(line,"ASG"))    
		{
		    ajFmtScanS(line, "%*S %S  %c %S %*d %c %*S %f %f %f %*S", 
			       &res, &pcid, &res_num, &ss, &ph, &ps, &sa);
                
		    /* 
		     **  Populate pdbplus object with the data from this parsed
		     **  line. This means first identifying the chain, then 
		     **  finding the residue. 
		     */
                
		    /* Determine the chain number. ajDmxPdbplusChain does not 
		       recognise '-', so change '-' to '.'  */
		    if (pcid == '-')
			pcid = '.'; 

		    /* Get chain number from the chain identifier. */
		    if(!ajPdbChnidToNum(pcid, pdb, &idn)) 
		    {
			ajWarn("Could not convert chain id %c to chain"
			       " number in pdb file %S\n//\n", 
			       pcid, pdb_name);
			ajFmtPrintF(errf, "Could not convert chain id %c "
				    "to chain number in pdb file %S\n//\n", 
				    pcid, pdb_name);
			continue;
		    }
                    
		    /* 
		     **  The chain number that will get written starts at 1, but
		     **  we want an index into an array which must start at 0, 
		     **  so subtract 1 from the chain number to get the index. 
		     */
		    chain_num = idn-1; 
                  
		    /* 
		     **   Iiterate through the list of residues in the Pdb object,
		     **   found switches to true when first residue corresponding 
		     **   to the line is found. 
		     */

		    /* iter = ajListIterNewread(pdb->Chains[chain_num]->Atoms); */
		    iter = ajListIterNewread(pdb->Chains[chain_num]->Residues);
		    found = ajFalse; 

		    while((temp_res = (AjPResidue)ajListIterGet(iter)))
		    {
		        /* If we have found the residue we want */
			if((ajStrMatchS(res_num, temp_res->Pdb) && 
			    ajStrMatchS(res, temp_res->Id3)))
			{
                       	    done_stride = ajTrue;
			    found = ajTrue;
			    temp_res->eStrideType = ss;
			    temp_res->Phi  = ph;
			    temp_res->Psi  = ps;
			    temp_res->Area = sa;
			}                 
			/* If the matching residue has been processed
			   move on to next ASG line, next residue. */
			else if(found == ajTrue) 
			    break;	
			else 
			/* Matching residue not found yet. */       
			    continue;	
		    }
		    ajListIterDel(&iter);
		} /* End of if ASG loop. */ 
	    } /* End of while line loop. */
	    

	    if(done_stride)
	      ajFmtPrintF(errf, "%s%S\n//\n", 
			  "stride data for: ", pdb_name); 
	    else
	      {
		ajFmtPrintF(errf, "%s%S\n//\n", 
			    "no stride data for: ", pdb_name); 
		ajWarn("%s%S\n//\n", "no stride data for: ", pdb_name);
		nostride++;
	      }


	    /* Close STRIDE temp file. & tidy up. */
	    ajFileClose(&tempf);

	    /* Remove temporary file (stride output file). */
	    ajFmtPrintS(&exec, "rm %S", randomname); 
	    ajSysSystem(exec); 
	    
	    /* 
	     **  Calculate element serial numbers (eStrideNum)& ammend residue
	     **  objects, count no's of elements and ammend chain object 
	     **  (numHelices, num Strands). 
	     */
	    pdbplus_sort(pdb, tS);
	}
	

	if(ajStrGetCharFirst(*mode) != '1')
        {        
	    /* 
	     **   Create a string containing the NACCESS command line (it needs
	     **   PDB file name & name of temp output file) & call NACCESS.
	     **   If e.g. /data/structure/pdbfred.ent was parsed and the program
	     **   was run from /stuff, then /stuff/fred.asa and /stuff/fred.rsa
	     **   would be written.  These must be deleted once parsed (only
	     **   use the .rsa file here). 
	     */
	    
	    ajFmtPrintS(&syscmd, "%S %S  >> %s 2>&1",  
			ajAcdGetpathC("naccess"), pdb_name, 
			ajFileGetNameC(nerrf));
	    ajFmtPrint("%S %S  >> %s 2>&1\n",  
		       ajAcdGetpathC("naccess"), pdb_name, 
		       ajFileGetNameC(nerrf));
	    system(ajStrGetPtr(syscmd));  


	    
	    ajStrAssignS(&naccess_str, pdbprefix);
	    ajStrAppendS(&naccess_str, pdb->Pdb);
	    ajStrAppendC(&naccess_str, ".rsa");
	    
	    /* Open the NACCESS output file. */
	    if (((tempf = ajFileNewInNameS(naccess_str)) == NULL))
	    {
		ajFmtPrintF(errf, "%s%S\n//\n", 
			    "no naccess output for: ", pdb_name); 
		ajWarn("%s%S\n//\n", "no naccess output for: ", pdb_name);
		nonaccess++;
		ajStrDel(&ccf_this);
		ajPdbDel(&pdb); 
		continue; 
	    }	 
	    else
	      ajFmtPrintF(errf, "%s%S\n//\n", 
			  "naccess output for: ", pdb_name); 


	    done_naccess = ajFalse;
	    /* Parse NACCESS output from temp output file a line at a time. */	    
	    while(ajReadlineTrim(tempf,&line))
	    {       
		if(ajStrPrefixC(line,"RES"))    
		{
		    /* Read data from lines. */
		    if((pcid = line->Ptr[8]) == ' ')
		      ajFmtScanS(line, "%*S %S %S %f %f %f "
				 "%f %f %f %f %f %f %f", 
				 &res, &res_num, &f1, &f2, &f3, &f4, &f5, 
				 &f6, &f7, &f8, &f9, &f10);
		    else
		      ajFmtScanS(line, "%*S %S %*c %S %f %f "
				 "%f %f %f %f %f %f %f %f", 
				 &res, &res_num, &f1, &f2, &f3, &f4, &f5, 
				 &f6, &f7, &f8, &f9, &f10);

		    /* Identify the chain, then finding all the residues 
		       corresponding to the residue. */
                
		    /* Get the chain number from the chain identifier. */
		    if(!ajPdbChnidToNum(pcid, pdb, &idn))
		    {
                        ajWarn("Could not convert chain id %c to chain"
				    " number in pdb file %S\n//\n", 
			       pcid, pdb_name);	
			ajFmtPrintF(errf, "Could not convert chain id"
				    " %c to chain number in pdb file %S\n//\n",
				    pcid, pdb_name);
			continue;
		    }
                    

                  
		    /* 
		     **  Chain number will start at 1, but we want an index 
		     **  into an array which must start at 0, so subtract 1 
		     **  from the chain number to get the index.
		     */
		    chain_num = idn-1; 



		    /* 
		     **   Iiterate through the list of residues in the Pdb object,
		     **   temp_res is an AjPResidue used to point to the current
		     **   residue.
		     **   ajBool found switches to true when first residue 
		     **   corresponding to the line is found. 
		     */
		    iter = ajListIterNewread(pdb->Chains[chain_num]->Residues);

		    found = ajFalse; 
		    while((temp_res = (AjPResidue)ajListIterGet(iter)))
		    {
			/* If we have found the residue we want, write the residue 
			   object. */
			if((ajStrMatchS(res_num, temp_res->Pdb) && 
			    ajStrMatchS(res, temp_res->Id3)))
                        {
			    found = ajTrue;
			    done_naccess = ajTrue;
			    temp_res->all_abs  = f1;
			    temp_res->all_rel  = f2;
			    temp_res->side_abs = f3;
			    temp_res->side_rel = f4;
			    temp_res->main_abs = f5;
			    temp_res->main_rel = f6;
			    temp_res->npol_abs = f7;
			    temp_res->npol_rel = f8;
			    temp_res->pol_abs  = f9;
			    temp_res->pol_rel  = f10;

			}      
			/* If the matching residues have all been processed. 
			   move on to next ASG line, next residue. */
			else if(found == ajTrue) 
			    break;	
			else 
			    /* Matching residues not found yet, move on to next 
			       residue. */
			    continue;	 
		    }
		    ajListIterDel(&iter);
		} 
	    } 
	    
	    if(done_naccess)
		ajFmtPrintF(errf, "%s%S\n//\n", 
			    "naccess data for: ", pdb_name); 
	    else
	    {
		ajFmtPrintF(errf, "%s%S\n//\n", 
			    "no naccess data for: ", pdb_name); 
		ajWarn("%s%S\n//\n", "no naccess data for: ", pdb_name);
		nonaccess++;
	    }

	    /* Remove temporary file (naccess output files). */
	    ajFileClose(&tempf);
	    
	    ajFmtPrintS(&exec, "rm %S", naccess_str); 
	    ajSysSystem(exec); 

	    ajStrAssignS(&naccess_str, pdbprefix);
	    ajStrAppendS(&naccess_str, pdb->Pdb);
	    ajStrAppendC(&naccess_str, ".asa");
	    ajFmtPrintS(&exec, "rm %S", naccess_str);
	    ajSysSystem(exec); 

	    ajStrAssignS(&naccess_str, pdbprefix);
	    ajStrAppendS(&naccess_str, pdb->Pdb);
	    ajStrAppendC(&naccess_str, ".log");
	    ajFmtPrintS(&exec, "rm %S", naccess_str);
	    ajSysSystem(exec); 
	}

        /* Open CCF (output) file. */
        ccf_outf = ajFileNewOutNameDirS(pdb->Pdb, ccfout);
	
        
        /* Write AjPPdb object to the output file in clean format. */
        if(!ajPdbWriteAll(ccf_outf, pdb))
        {               
	    ajWarn("%s%S\n//\n","Could not write results file for: ", 
                        pdb->Pdb);  
	    
	    ajFmtPrintF(errf,"%s%S\n//\n","Could not write results file for ", 
                        pdb->Pdb);

        }	
	ajFileClose(&ccf_outf);
        ajPdbDel(&pdb);
        ajStrDel(&ccf_this);
    } /* End of main application loop. */


    ajFmtPrint("STRIDE  failures: %d\n", nostride);
    ajFmtPrint("NACCESS failures: %d\n", nonaccess);
    ajFmtPrintF(errf, "\n\nSTRIDE  failures: %d\nNACCESS failures: %d\n",
		nostride, nonaccess);
    

    

    ajListFree(&ccfin);
    ajDirDel(&pdbin);
    ajStrDel(&pdbprefix);
    ajStrDel(&pdb_name);
    ajDiroutDel(&ccfout);
    ajStrDel(&res);
    ajStrDel(&res_num);
    ajStrDel(&randomname);
    ajStrDel(&line);
    ajStrDel(&naccess_str);
    ajStrDel(&exec);
    ajStrDel(&syscmd);
  
    ajFileClose(&errf);
    if(ajStrGetCharFirst(*mode) != '2')
	ajFileClose(&serrf);
    if(ajStrGetCharFirst(*mode) != '1')
	ajFileClose(&nerrf);

    ajStrDel(&mode[0]);
    AJFREE(mode);




    
    ajExit();
    return 0;
} 
コード例 #16
0
int main(int argc, char **argv)
{
    const char *codons[]=
    {
	"TAG","TAA","TGA","GCG","GCA","GCT","GCC","TGT", /* 00-07 */
	"TGC","GAT","GAC","GAA","GAG","TTT","TTC","GGT", /* 08-15 */
	"GGG","GGA","GGC","CAT","CAC","ATA","ATT","ATC", /* 16-23 */
	"AAA","AAG","CTA","TTA","TTG","CTT","CTC","CTG", /* 24-31 */
	"ATG","AAT","AAC","CCG","CCA","CCT","CCC","CAA", /* 32-39 */
	"CAG","CGT","CGA","CGC","AGG","AGA","CGG","TCG", /* 40-47 */
	"TCA","AGT","TCT","TCC","AGC","ACG","ACT","ACA", /* 48-55 */
	"ACC","GTA","GTT","GTC","GTG","TGG","TAT","TAC"	 /* 56-63 */
    };

    const char *aa=
	"***AAAACCDDEEFFGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVWYY";

    AjPFile inf     = NULL;
    AjPFile outf    = NULL;
    char *entryname = NULL;
    AjPStr fname    = NULL;
    AjPStr key      = NULL;
    AjPStr tmpkey   = NULL;
    AjBool allrecords = AJFALSE;

    AjPTable table  = NULL;
    ajint i = 0;
    ajint j = 0;
    ajint k = 0;
    ajint x = 0;
    ajint savecount[3];

    AjPStr *keyarray = NULL;
    CutgPValues *valarray = NULL;
    AjPCod codon  = NULL;
    ajint sum = 0;
    char c;

    AjPList flist = NULL;
    AjPFile logf = NULL;
    AjPStr  entry = NULL;
    AjPStr  baseentry = NULL;
    AjPStr  wild  = NULL;
    AjPStr division = NULL;
    AjPStr release = NULL;
    AjPStr wildspecies = NULL;
    CutgPValues value = NULL;
    AjPStr docstr = NULL;
    AjPStr species = NULL;
    AjPStr filename = NULL;
    ajint nstops;

    embInit("cutgextract",argc,argv);

    tmpkey = ajStrNew();
    fname  = ajStrNew();


    table = ajTablestrNewLen(TABLE_ESTIMATE);


    flist = ajAcdGetDirlist("directory");
    wild  = ajAcdGetString("wildspec");
    release  = ajAcdGetString("release");
    logf = ajAcdGetOutfile("outfile");
    wildspecies = ajAcdGetString("species");
    filename = ajAcdGetString("filename");
    allrecords = ajAcdGetBoolean("allrecords");

    ajStrInsertC(&release, 0, "CUTG");
    ajStrRemoveWhite(&release);

    while(ajListPop(flist,(void **)&entry))
    {
	ajStrAssignS(&baseentry, entry);
	ajFilenameTrimPath(&baseentry);
	ajDebug("Testing file '%S'\n", entry);
	if(!ajStrMatchWildS(baseentry,wild))
	{
	    ajStrDel(&entry);
	    continue;
	}

	ajDebug("... matched wildcard '%S'\n", wild);
	inf = ajFileNewInNameS(entry);
	if(!inf)
	    ajFatal("cannot open file %S",entry);

	ajFmtPrintS(&division, "%F", inf);
	ajFilenameTrimAll(&division);

	while((entryname = cutgextract_next(inf, wildspecies,
					    &species, &docstr)))
	{
	    if(ajStrGetLen(filename))
		ajStrAssignS(&tmpkey,filename);
	    else
		ajStrAssignC(&tmpkey,entryname);

	    /* See if organism is already in the table */
	    value = ajTableFetch(table,tmpkey);
	    if(!value)			/* Initialise */
	    {
		key = ajStrNewS(tmpkey);
		AJNEW0(value);
		ajStrAssignS(&value->Species,species);
		ajStrAssignS(&value->Division, division);
		ajTablePut(table,(void *)key,(void *)value);
	    }
	    for(k=0;k<3;k++)
		savecount[k] = value->Count[k];
	    nstops = cutgextract_readcodons(inf,allrecords, value->Count);
	    if(nstops < 1)
	    {
		value->Skip++;
		continue;
	    }
	    value->CdsCount++;
	    if(nstops>1)
	    {
		value->CdsCount += (nstops - 1);
		value->Warn++;
		ajWarn("Found %d stop codons (%d %d %d) for CDS '%S'",
		       nstops,
		       value->Count[0] - savecount[0],
		       value->Count[1] - savecount[1],
		       value->Count[2] - savecount[2],
		       cutgextractSavepid);
	    }
	}
	ajStrDel(&entry);
	ajFileClose(&inf);
    }

    ajTableToarrayKeysValues(table,(void***) &keyarray, (void***) &valarray);

    i = 0;
    while(keyarray[i])
    {
	key   = keyarray[i];
	value = (CutgPValues) valarray[i++];
	codon = ajCodNew();
	sum   = 0;
	for(j=0;j<CODONS;++j)
	{
	    sum += value->Count[j];
	    x = ajCodIndexC(codons[j]);
	    codon->num[x] = value->Count[j];

	    c = aa[j];
	    if(c=='*')
		codon->aa[x] = 27;
	    else
		codon->aa[x] = c-'A';
	}
	ajCodCalcUsage(codon,sum);

	ajStrAppendC(&key, ".cut");
	if(allrecords)
	{
	    if(value->Warn)
		ajFmtPrintF(logf, "Writing %S CDS: %d Warnings: %d\n",
			    key, value->CdsCount, value->Warn);
	    else
		ajFmtPrintF(logf, "Writing %S CDS: %d\n",
			    key, value->CdsCount);
	}
	else
	{
	    if(value->Skip)
		ajFmtPrintF(logf, "Writing %S CDS: %d Skipped: %d\n",
			    key, value->CdsCount, value->Skip);
	    else
		ajFmtPrintF(logf, "Writing %S CDS: %d\n",
			    key, value->CdsCount);
	}

	ajFmtPrintS(&fname,"CODONS/%S",key);
	outf = ajDatafileNewOutNameS(fname);
	if(!outf)
	    ajFatal("Cannot open output file %S",fname);

	ajCodSetNameS(codon, key);
	ajCodSetSpeciesS(codon, value->Species);
	ajCodSetDivisionS(codon, value->Division);
	ajCodSetReleaseS(codon, release);
	ajCodSetNumcds(codon, value->CdsCount);
	ajCodSetNumcodons(codon, sum);

	ajCodWrite(codon, outf);
	ajFileClose(&outf);


	ajStrDel(&key);
	ajStrDel(&value->Division);
	ajStrDel(&value->Doc);
	ajStrDel(&value->Species);
	AJFREE(value);
	ajCodDel(&codon);
    }

    AJFREE(keyarray);
    AJFREE(valarray);

    ajTableFree(&table);
    ajListFree(&flist);
    ajStrDel(&wild);
    ajStrDel(&release);
    ajStrDel(&wildspecies);
    ajStrDel(&filename);
    ajFileClose(&logf);

    ajStrDel(&cutgextractSavepid);
    ajStrDel(&cutgextractLine);
    ajStrDel(&cutgextractOrg);

    ajStrDel(&fname);
    ajStrDel(&tmpkey);
    ajStrDel(&species);
    ajStrDel(&docstr);
    ajStrDel(&division);
    ajStrDel(&baseentry);

    embExit();

    return 0;
}
コード例 #17
0
ファイル: prosextract.c プロジェクト: WenchaoLin/JAMg
int main(int argc, char **argv)
{
    AjPFile infdat = NULL;
    AjPFile infdoc = NULL;
    AjPFile outf   = NULL;
    AjPFile outs   = NULL;

    AjBool  haspattern;

    const char   *p;


    AjPStr line  = NULL;
    AjPStr text  = NULL;
    AjPStr dirname  = NULL;
    AjPStr filename = NULL;
    AjPStr id    = NULL;
    AjPStr ac    = NULL;
    AjPStr de    = NULL;
    AjPStr pa    = NULL;
    AjPStr ps    = NULL;
    AjPStr fn    = NULL;
    AjPStr re    = NULL;
    AjPStr fname = NULL;
    AjBool flag;
    AjBool isopen;
    AjBool goback;

    ajlong storepos = 0L;


    embInit("prosextract", argc, argv);

    dirname = ajAcdGetDirectoryName("prositedir");

    line = ajStrNew();
    text = ajStrNew();

    id = ajStrNew();
    ac = ajStrNew();
    de = ajStrNew();
    pa = ajStrNew();
    ps = ajStrNew();



    fn=ajStrNew();
    ajStrAssignS(&fn,dirname);
    ajStrAppendC(&fn,"prosite.dat");
    if(!(infdat=ajFileNewInNameS(fn)))
	ajFatal("Cannot open file %S",fn);
    ajStrDel(&fn);



    fn=ajStrNewC("PROSITE/prosite.lines");
    outf = ajDatafileNewOutNameS(fn);
    ajStrDel(&fn);



    haspattern = ajFalse;

    while(ajReadlineTrim(infdat, &line) )
    {
	if(ajStrPrefixC(line, "ID"))
	{
	    if(ajStrSuffixC(line,"PATTERN."))
	    {
		haspattern = ajTrue;
		/*save id*/
		p = ajStrGetPtr(line);
		p = ajSysFuncStrtok(p," \t;");
		p = ajSysFuncStrtok(NULL," \t;");
		ajStrAssignC(&id,p);
		ajFmtPrintF(outf, "%S ", id);
		continue;
	    }
	    else
	    {
		haspattern = ajFalse;
		continue;
	    }
	}

	if(!haspattern)
	    continue;


	if(ajStrPrefixC(line, "AC") )
	{
	    p = ajStrGetPtr(line);
	    p = ajSysFuncStrtok(p, " \t;");
	    p = ajSysFuncStrtok(NULL, " \t;");
	    ajStrAssignC(&ac,p);
	    ajFmtPrintF(outf, "%S\n ", ac);
	    continue;
	}

    	if(ajStrPrefixC(line, "DE") )
	{
	    p = ajStrGetPtr(line);
	    p = ajSysFuncStrtok(p, " \t.");
	    p = ajSysFuncStrtok(NULL, " \t.");
	    ajStrAssignC(&de,p);
	    ajFmtPrintF(outf, "%S\n ", de);
	    continue;
	}


	if(ajStrPrefixC(line, "PA"))
	{
	    ajStrAssignC(&pa,"");

	    while(ajStrPrefixC(line,"PA"))
	    {
		p = ajStrGetPtr(line);
		p = ajSysFuncStrtok(p, " \t.");
		p = ajSysFuncStrtok(NULL, " \t.");
		ajStrAppendC(&pa,p);
		ajReadlineTrim(infdat, &line);
	    }

	    ajFmtPrintF(outf, "%S\n", pa);
	    re = embPatPrositeToRegExp(pa);
	    ajFmtPrintF(outf, "^%S\n\n", re);
	    ajStrDel(&re);
	    continue;
	}
    }


  /* Finished processing prosite.dat so look at prosite.doc */


    fn = ajStrNew();
    ajStrAssignS(&fn,dirname);
    ajStrAppendC(&fn,"prosite.doc");
    if(!(infdoc=ajFileNewInNameS(fn)))
	ajFatal("Cannot open file %S",fn);
    ajStrDel(&fn);



    fname  = ajStrNewC("PROSITE/");
    flag   = ajFalse;
    isopen = ajFalse;
    goback = ajFalse;


    while(ajReadlineTrim(infdoc, &text))
    {
	if(ajStrPrefixC(text, "{PS") && isopen && !goback)
	    goback = ajTrue;

	if(ajStrPrefixC(text, "{PS") && !isopen)
	{
	    storepos = ajFileResetPos(infdoc);
	    /* save out the documentation text to acc numbered outfiles . */
	    p = ajStrGetPtr(text)+1;
	    p = ajSysFuncStrtok(p, ";");
	    ajStrAssignS(&filename, fname);
	    ajStrAppendC(&filename, p);

	    outs = ajDatafileNewOutNameS(filename);
	    flag   = ajTrue;
	    isopen = ajTrue;
	    continue;
	}


	if(ajStrPrefixC(text, "{BEGIN}") && flag)
	{
	    while(ajReadlineTrim(infdoc, &text))
	    {
		if(ajStrPrefixC(text,"{END}"))
		    break;

		ajFmtPrintF(outs, "%S\n", text);
	    }
	    ajFileClose(&outs);
	    isopen = ajFalse;

	    if(goback)
	    {
		goback = ajFalse;
		ajFileSeek(infdoc,storepos,0);
	    }

	}
    }

    ajStrDel(&line);
    ajStrDel(&text);
    ajStrDel(&dirname);
    ajStrDel(&filename);

    ajStrDel(&id);
    ajStrDel(&ac);
    ajStrDel(&de);
    ajStrDel(&pa);
    ajStrDel(&re);
    ajStrDel(&ps);
    ajStrDel(&fname);


    ajFileClose(&infdat);
    ajFileClose(&infdoc);
    ajFileClose(&outf);

    embExit();

    return 0;
}
コード例 #18
0
static void jaspextract_copyfiles(AjPStr directory)
{
    AjPStr matrixfile = NULL;
    AjPList flist     = NULL;
    
    AjPStr wild  = NULL;
    AjPStr entry = NULL;
    AjPStr bname = NULL;
    AjPStr line  = NULL;
    AjPStr dest  = NULL;

    const AjPStr datadir = NULL;
    
    ajuint preflen = 0;
    ajuint i       = 0;
    const char *p  = NULL;

    AjPFile inf   = NULL;
    AjPFile outf  = NULL;    
    
    matrixfile = ajStrNew();
    flist      = ajListNew();
    wild       = ajStrNewC("*.pfm");
    bname      = ajStrNew();
    line       = ajStrNew();
    dest       = ajStrNew();


    datadir = ajDatafileValuePath();
    if(!datadir)
        ajFatal("jaspextract: Cannot determine the EMBOSS data directory");
    
    ajFmtPrintS(&matrixfile,"%S%s",directory,MATRIXFILE);

    if(!ajFilenameExistsRead(matrixfile))
        ajFatal("jaspextract: Directory (%S) doesn't appear to be a JASPAR "
                "one\nNo matrix_list.txt file found",directory);
    
    ajFilelistAddPathWild(flist, directory, wild);


    while(ajListPop(flist,(void **)&entry))
    {
        ajStrAssignS(&bname,entry);
        ajFilenameTrimPath(&bname);
        
        i = 0;

        while(Jprefix[i].Prefix)
        {
            if(!ajStrPrefixC(bname,Jprefix[i].Prefix))
            {
                ++i;
                continue;
            }

            preflen = strlen(Jprefix[i].Prefix);
            p = ajStrGetPtr(bname);
            if(p[preflen]>='0' && p[preflen]<='9')
                break;

            ++i;
        }

        if(!Jprefix[i].Prefix)
        {
            ajStrDel(&entry);
            continue;
        }


        ajFmtPrintS(&dest,"%S%s%c%S",datadir,Jprefix[i].Directory,SLASH_CHAR,
                    bname);

        outf = ajFileNewOutNameS(dest);
        if(!outf)
            ajFatal("Cannot open output file %S",dest);

        /* Avoid UNIX copy for portability */
        inf  = ajFileNewInNameS(entry);
        if(!inf)
            ajFatal("Cannot open input file: %S",entry);

        while(ajReadlineTrim(inf,&line))
            ajFmtPrintF(outf,"%S\n",line);

        ajFileClose(&inf);
        ajFileClose(&outf);
        
        ajStrDel(&entry);        

    }
    
    ajListFree(&flist);
    
    ajStrDel(&wild);
    ajStrDel(&dest);
    ajStrDel(&line);
    ajStrDel(&bname);
    ajStrDel(&matrixfile);

    return;
}
コード例 #19
0
/* @funcstatic domainalign_stamp **********************************************
**
** Call STAMP and process files.
**
** @param [r] prevdomain [AjPDomain] Previous domain.
** @param [r] domain [AjPDomain] This domain.
** @param [r] daf [AjPDirout] Domain alignment files.
** @param [r] super [AjPDirout] Superimposition files.
** @param [r] singlets [AjPDirout]  Singlet files.
** @param [r] align [AjPStr]   Align.
** @param [r] alignc [AjPStr] Alignc.
** @param [r] dom [AjPStr]   Dom.
** @param [r] name [AjPStr] Name.
** @param [r] set [AjPStr] Name of set file.
** @param [r] scan [AjPStr] Name of scan file.
** @param [r] sort [AjPStr] Name of sort file.
** @param [r] log [AjPStr] Log file name.
** @param [r] out [AjPStr] Out file name.
** @param [r] keepsinglets [AjBool] Keep singlet sequences or not.
** @param [r] moden [ajint] Mode number.
** @param [r] noden [ajint] Node number.
** @param [r] nset [ajint] Number in set.
** @param [r] logf [AjPFile] Lof file.
** 
**
** @return [void] True on success
** @@
****************************************************************************/
static void domainalign_stamp(AjPDomain prevdomain,
			      AjPDomain domain, 
			      AjPDirout daf, 
			      AjPDirout super,
			      AjPDirout singlets, 
			      AjPStr    align, 
			      AjPStr    alignc, 
			      AjPStr    dom, 
			      AjPStr    name, 
			      AjPStr    set, 
			      AjPStr    scan, 
			      AjPStr    sort, 
			      AjPStr    log, 
			      AjPStr    out, 
			      AjBool    keepsinglets, 
			      ajint     moden, 
			      ajint     noden,
			      ajint     nset, 
			      AjPFile   logf)
{
    AjPStr    exec      = NULL;	/* The UNIX command line to be executed.   */
    AjPFile   clusterf  = NULL;	/* File pointer for log file.              */
    ajint     ncluster  = 0;	/* Counter for the number of clusters.     */
    AjPStr    line      = NULL;	/* Holds a line from the log file.         */
    AjPRegexp rexp      = NULL;	/* For parsing no. of clusters in log file */
    AjPStr    temp      = NULL;	/* A temporary string.                     */
    ajint     x         = 0;    /* Loop counter.                           */
    

    exec     = ajStrNew();
    line     = ajStrNew();
    temp     = ajStrNew();



    rexp     = ajRegCompC("^(Cluster:)");

    ajDebug("domainalign_stamp name: '%S'\n", name);
    
    /* Call STAMP. */
    ajFmtPrintS(&exec,	"%S -l %S -s -n 2 -slide 5 -prefix %S -d %S",
		ajAcdGetpathC("stamp"), dom, name, set);
    ajFmtPrint("\n%S\n\n", exec);
    ajSysExecS(exec);  

    ajFmtPrintS(&exec, "%S -f %S -s Sc 2.5",
		ajAcdGetpathC("sorttrans"), scan);
    ajFmtPrint("\n%S > %S\n\n", exec, sort);

    ajSysExecOutnameS(exec, sort);

    ajFmtPrintS(&exec, "%S -l %S -prefix %S",
		ajAcdGetpathC("stamp"), sort, name);
    ajFmtPrint("\n%S > %S\n\n", exec, log);
    ajSysExecOutnameS(exec, log);
	
    ajFmtPrintS(&exec, "%S -f %S -g  -o %S",
		ajAcdGetpathC("transform"), sort, alignc);
    ajFmtPrint("\n%S\n\n", exec);
    ajSysExecS(exec);
    
    
    /* Count the number of clusters in the log file. */
    if(!(clusterf=ajFileNewInNameS(log)))
	ajFatal("Could not open log file '%S'\n", log);
    ncluster=0;
    while(ajReadlineTrim(clusterf,&line))
	if(ajRegExec(rexp,line))
	    ncluster++;
    ajFileClose(&clusterf);	

    ajDebug("ncluster: %d\n", ncluster);
    
    /* Call STAMP ... calculate two fields for structural equivalence using 
       threshold Pij value of 0.5, see stamp manual v4.1 pg 27. */
    ajFmtPrintS(&exec,"%S -f %S.%d -min 0.5",
		ajAcdGetpathC("poststamp"), name, ncluster);
    ajFmtPrint("%S\n\n", exec);
    ajSysExecS(exec);
    
    
    /* Call STAMP ... convert block format alignment into clustal format. */
    ajFmtPrintS(&exec,"%S -f %S.%d.post",
		ajAcdGetpathC("ver2hor"), name, ncluster); 
    ajFmtPrint("%S > %S\n\n", exec, out);
    ajSysExecOutnameS(exec, out);
    
    
    /* Process STAMP alignment file and generate alignment file for output. */
    domainalign_ProcessStampFile(out, align, prevdomain, noden, logf);
    
    
    /* Remove all temporary files. */
    
    for(x=1;x<ncluster+1;x++)
    {
	ajFmtPrintS(&temp, "%S.%d", name, x);
	ajSysFileUnlinkS(temp); 
    }
    
    ajFmtPrintS(&temp, "%S.%d.post", name, ncluster);
    ajSysFileUnlinkS(temp); 

    ajStrDel(&exec);
    ajStrDel(&line);
    ajStrDel(&temp);
    ajRegFree(&rexp);

    return;
}   
コード例 #20
0
/* @funcstatic domainalign_ProcessTcoffeeFile *********************************
**
** Parses tcoffee output.
**
** @param [r] in [AjPStr] Name of TCOFFEE input file
** @param [r] align [AjPStr] Name of sequence alignment file for output
** @param [r] domain [AjPDomain] Domain being aligned
** @param [r] noden [ajint] Node-level of alignment** 
** @param [r] logf [AjPFile] Log file
**
** @return [void] True on success
** @@
****************************************************************************/
static void domainalign_ProcessTcoffeeFile(AjPStr in, 
					   AjPStr align, 
					   AjPDomain domain,
					   ajint noden, 
					   AjPFile logf)
{
    AjPFile  outf = NULL;  /* Output file pointer. */
    AjPFile   inf = NULL;  /* Input file pointer. */
    AjPStr  temp1 = NULL;  /* Temporary string. */
    AjPStr  temp2 = NULL;  /* Temporary string. */
    AjPStr  temp3 = NULL;  /* Temporary string. */
    AjPStr   line = NULL;  /* Line of text from input file. */
    
    
    /* Initialise strings. */
    line    = ajStrNew();
    temp1   = ajStrNew();
    temp2   = ajStrNew();
    temp3   = ajStrNew();



    /* Open input and output files. */
    if(!(inf=ajFileNewInNameS(in)))
        ajFatal("Could not open input file in domainalign_ProcessTcoffeeFile");
    if(!(outf=ajFileNewOutNameS(align)))
        ajFatal("Could not open output file in domainalign_ProcessTcoffeeFile");
    

    /*Write DOMAIN classification records to file*/
    if((domain->Type == ajSCOP))
    {
	ajFmtPrintF(outf,"# TY   SCOP\n# XX\n");
	ajFmtPrintF(outf,"# CL   %S",domain->Scop->Class);
	ajFmtPrintSplit(outf,domain->Scop->Fold,"\n# XX\n# FO   ",75," \t\n\r");
	ajFmtPrintSplit(outf,domain->Scop->Superfamily,"# XX\n# SF   ",75," \t\n\r");
	ajFmtPrintSplit(outf,domain->Scop->Family,"# XX\n# FA   ",75," \t\n\r");
	ajFmtPrintF(outf,"# XX\n");
    }
    else
    {
	ajFmtPrintF(outf,"# TY   CATH\n# XX\n");
	ajFmtPrintF(outf,"# CL   %S",domain->Cath->Class);
	ajFmtPrintSplit(outf,domain->Cath->Architecture,"\n# XX\n# AR   ",75," \t\n\r");
	ajFmtPrintSplit(outf,domain->Cath->Topology,"# XX\n# TP   ",75," \t\n\r");
	ajFmtPrintSplit(outf,domain->Cath->Superfamily,"# XX\n# SF   ",75," \t\n\r");
	ajFmtPrintF(outf,"# XX\n");
    }
    
    if((domain->Type == ajSCOP))
    {
	if(noden==1) 
	    ajFmtPrintF(outf,"# SI   %d\n# XX\n",domain->Scop->Sunid_Class);
	else if(noden==2)
	    ajFmtPrintF(outf,"# SI   %d\n# XX\n",domain->Scop->Sunid_Fold);
	else if(noden==3)
	    ajFmtPrintF(outf,"# SI   %d\n# XX\n",domain->Scop->Sunid_Superfamily);
	else if(noden==4) 	
	    ajFmtPrintF(outf,"# SI   %d\n# XX\n", domain->Scop->Sunid_Family);
	else
	    ajFatal("Node number error in domainalign_ProcessStampFile");
    }	
    else
    {
	    if(noden==5) 
		ajFmtPrintF(outf,"# SI   %d\n# XX\n", domain->Cath->Class_Id);
	    else if(noden==6)
		ajFmtPrintF(outf,"# SI   %d\n# XX\n", domain->Cath->Arch_Id);
	    else if(noden==7)
		ajFmtPrintF(outf,"# SI   %d\n# XX\n",domain->Cath->Topology_Id);
	    else if(noden==8)
		ajFmtPrintF(outf,"# SI   %d\n# XX\n",domain->Cath->Superfamily_Id);
	    else if(noden==9)  
		ajFmtPrintF(outf,"# SI   %d\n# XX\n",domain->Cath->Family_Id);
	    else
		ajFatal("Node number error in domainalign_ProcessStampFile");
    }   


    
    /* Start of code for reading input file. */
    /*Ignore everything up to first line beginning with 'Number'*/
    while((ajReadlineTrim(inf,&line)))
        /* ajFileReadLine will trim the tailing \n. */
        if((ajStrGetCharPos(line, 1)=='\0'))
            break;

    
    /* Read rest of input file. */
    while((ajReadlineTrim(inf,&line)))
    {
      if((ajStrGetCharPos(line, 1)=='\0'))
        continue; 
        
       /* Print the number line out as it is. */
            else if(ajStrPrefixC(line,"CLUSTAL"))
              continue;
	    else if(ajStrPrefixC(line," "))
                ajFmtPrintF(outf,"\n");
        /* write out a block of protein sequences. */
        else
        {
               /* Read only the 7 characters of the domain identifier code in. */
               ajFmtScanS(line, "%S %S", &temp1,&temp3);
                 ajStrAssignSubS(&temp2, temp1, 0, 6);
  
  
         /* Read the sequence
                   ajStrAssignSubS(&temp3, line, 13, 69);
                   ajStrExchangeSetCC(&temp3, " ", "X");
                   ajStrFmtUpper(&temp3);*/
                
  
                   /* Write domain id code and sequence out. */
                   ajFmtPrintF(outf,"%-13S%S\n",temp2, temp3);              
        }
    }
    

    /* Clean up and close input and output files. */
    ajFileClose(&outf);
    ajFileClose(&inf);
    ajStrDel(&line);
    ajStrDel(&temp1);
    ajStrDel(&temp2);
    ajStrDel(&temp3);
    

    /* All done. */
    return;
}
コード例 #21
0
ファイル: emma.c プロジェクト: WenchaoLin/JAMg
int main(int argc, char **argv)
{

    AjPSeqall seqall = NULL;
    AjPFile dend_outfile = NULL;
    AjPStr tmp_dendfilename = NULL;
    AjPFile tmp_dendfile = NULL;

    AjPStr tmp_aln_outfile = NULL;
    AjPSeqset seqset = NULL;
    AjPSeqout seqout = NULL;
    AjPSeqin  seqin  = NULL;

    AjBool only_dend;
    AjBool are_prot = ajFalse;
    AjBool do_slow;
    AjBool use_dend;
    AjPFile dend_file = NULL;
    AjPStr dend_filename = NULL;

    ajint ktup;
    ajint gapw;
    ajint topdiags;
    ajint window;
    AjBool nopercent;

    AjPStr pw_matrix = NULL;
    AjPStr pw_dna_matrix  = NULL;
    AjPFile pairwise_matrix = NULL;
    float pw_gapc;
    float pw_gapv;

    AjPStr pwmstr = NULL;
    char   pwmc   = '\0';
    AjPStr pwdstr = NULL;
    char   pwdc   = '\0';

    AjPStr m1str = NULL;
    AjPStr m2str = NULL;
    char   m1c   = '\0';
    char   m2c   = '\0';

    AjPStr matrix = NULL;
    AjPStr dna_matrix = NULL;
    AjPFile ma_matrix = NULL;
    float gapc;
    float gapv;
    AjBool endgaps;
    AjBool norgap;
    AjBool nohgap;
    ajint gap_dist;
    ajint maxdiv;
    AjPStr hgapres = NULL;


    AjPSeqout fil_file = NULL;
    AjPSeq seq = NULL;

    AjPStr cmd = NULL;
    AjPStr tmp = NULL;
    AjPStr tmpFilename;
    AjPStr line = NULL;
    ajint nb = 0;


    /* get all the parameters */

    embInit("emma", argc, argv);

    pwmstr = ajStrNew();
    pwdstr = ajStrNew();
    m1str  = ajStrNew();
    m2str  = ajStrNew();


    seqall = ajAcdGetSeqall("sequence");
    seqout = ajAcdGetSeqoutset("outseq");

    dend_outfile = ajAcdGetOutfile("dendoutfile");

    only_dend = ajAcdGetToggle("onlydend");
    use_dend  = ajAcdGetToggle("dendreuse");
    dend_file = ajAcdGetInfile("dendfile");
    if (dend_file)
	ajStrAssignS(&dend_filename, ajFileGetPrintnameS(dend_file));
    ajFileClose(&dend_file);

    do_slow = ajAcdGetToggle("slowalign");

    ktup      = ajAcdGetInt("ktup");
    gapw      = ajAcdGetInt("gapw");
    topdiags  = ajAcdGetInt("topdiags");
    window    = ajAcdGetInt("window");
    nopercent = ajAcdGetBoolean("nopercent");

    pw_matrix = ajAcdGetListSingle("pwmatrix");
    pwmc = ajStrGetCharFirst(pw_matrix);

    if(pwmc=='b')
	ajStrAssignC(&pwmstr,"blosum");
    else if(pwmc=='p')
	ajStrAssignC(&pwmstr,"pam");
    else if(pwmc=='g')
	ajStrAssignC(&pwmstr,"gonnet");
    else if(pwmc=='i')
	ajStrAssignC(&pwmstr,"id");
    else if(pwmc=='o')
	ajStrAssignC(&pwmstr,"own");


    pw_dna_matrix = ajAcdGetListSingle("pwdnamatrix");
    pwdc = ajStrGetCharFirst(pw_dna_matrix);

    if(pwdc=='i')
	ajStrAssignC(&pwdstr,"iub");
    else if(pwdc=='c')
	ajStrAssignC(&pwdstr,"clustalw");
    else if(pwdc=='o')
	ajStrAssignC(&pwdstr,"own");

    pairwise_matrix = ajAcdGetInfile("pairwisedatafile");

    pw_gapc = ajAcdGetFloat( "pwgapopen");
    pw_gapv = ajAcdGetFloat( "pwgapextend");

    matrix = ajAcdGetListSingle( "matrix");
    m1c = ajStrGetCharFirst(matrix);

    if(m1c=='b')
	ajStrAssignC(&m1str,"blosum");
    else if(m1c=='p')
	ajStrAssignC(&m1str,"pam");
    else if(m1c=='g')
	ajStrAssignC(&m1str,"gonnet");
    else if(m1c=='i')
	ajStrAssignC(&m1str,"id");
    else if(m1c=='o')
	ajStrAssignC(&m1str,"own");


    dna_matrix = ajAcdGetListSingle( "dnamatrix");
    m2c = ajStrGetCharFirst(dna_matrix);

    if(m2c=='i')
	ajStrAssignC(&m2str,"iub");
    else if(m2c=='c')
	ajStrAssignC(&m2str,"clustalw");
    else if(m2c=='o')
	ajStrAssignC(&m2str,"own");


    ma_matrix = ajAcdGetInfile("mamatrixfile");
    gapc      = ajAcdGetFloat("gapopen");
    gapv      = ajAcdGetFloat("gapextend");
    endgaps   = ajAcdGetBoolean("endgaps");
    norgap    = ajAcdGetBoolean("norgap");
    nohgap    = ajAcdGetBoolean("nohgap");
    gap_dist  = ajAcdGetInt("gapdist");
    hgapres   = ajAcdGetString("hgapres");
    maxdiv    = ajAcdGetInt("maxdiv");

    tmp = ajStrNewC("fasta");

    /*
    ** Start by writing sequences into a unique temporary file
    ** get file pointer to unique file
    */


    fil_file = ajSeqoutNew();
    tmpFilename = emma_getUniqueFileName();
    if(!ajSeqoutOpenFilename( fil_file, tmpFilename))
	embExitBad();

    /* Set output format to fasta */
    ajSeqoutSetFormatS( fil_file, tmp);

    while(ajSeqallNext(seqall, &seq))
    {
        /*
        **  Check sequences are all of the same type
        **  Still to be done
        **  Write out sequences
        */
	if (!nb)
	    are_prot  = ajSeqIsProt(seq);
        ajSeqoutWriteSeq(fil_file, seq);
	++nb;
    }
    ajSeqoutClose(fil_file);

    if(nb < 2)
	ajFatal("Multiple alignments need at least two sequences");

    /* Generate clustalw command line */
    cmd = ajStrNewS(ajAcdGetpathC("clustalw"));

    /* add tmp file containing sequences */
    ajStrAppendC(&cmd, " -infile=");
    ajStrAppendS(&cmd, tmpFilename);

    /* add out file name */
    tmp_aln_outfile = emma_getUniqueFileName();
    ajStrAppendC(&cmd, " -outfile=");
    ajStrAppendS(&cmd, tmp_aln_outfile);


    /* calculating just the nj tree or doing full alignment */
    if(only_dend)
        ajStrAppendC(&cmd, " -tree");
    else
        if(!use_dend)
	    ajStrAppendC(&cmd, " -align");

    /* Set sequence type from information from acd file */
    if(are_prot)
        ajStrAppendC(&cmd, " -type=protein");
    else
        ajStrAppendC(&cmd, " -type=dna");


    /*
    **  set output to MSF format - will read in this file later and output
    **  user requested format
    */
    ajStrAppendC(&cmd, " -output=");
    ajStrAppendC(&cmd, "gcg");

    /* If going to do pairwise alignment */
    if(!use_dend)
    {
        /* add fast pairwise alignments*/
        if(!do_slow)
        {
            ajStrAppendC(&cmd, " -quicktree");
            ajStrAppendC(&cmd, " -ktuple=");
            ajStrFromInt(&tmp, ktup);
            ajStrAppendS(&cmd, tmp);
            ajStrAppendC(&cmd, " -window=");
            ajStrFromInt(&tmp, window);
            ajStrAppendS(&cmd, tmp);
            if(nopercent)
                ajStrAppendC(&cmd, " -score=percent");
            else
                ajStrAppendC(&cmd, " -score=absolute");
            ajStrAppendC(&cmd, " -topdiags=");
            ajStrFromInt(&tmp, topdiags);
            ajStrAppendS(&cmd, tmp);
            ajStrAppendC(&cmd, " -pairgap=");
            ajStrFromInt(&tmp, gapw);
            ajStrAppendS(&cmd, tmp);
        }
        else
        {
            if(pairwise_matrix)
            {
		if(are_prot)
		    ajStrAppendC(&cmd, " -pwmatrix=");
		else
		    ajStrAppendC(&cmd, " -pwdnamatrix=");
		ajStrAppendS(&cmd, ajFileGetPrintnameS(pairwise_matrix));
            }
            else
            {
		if(are_prot)
		{
		    ajStrAppendC(&cmd, " -pwmatrix=");
		    ajStrAppendS(&cmd, pwmstr);
		}
		else
		{
		    ajStrAppendC(&cmd, " -pwdnamatrix=");
		    ajStrAppendS(&cmd, pwdstr);
		}
            }
            ajStrAppendC(&cmd, " -pwgapopen=");
            ajStrFromFloat(&tmp, pw_gapc, 3);
            ajStrAppendS(&cmd, tmp);
            ajStrAppendC(&cmd, " -pwgapext=");
            ajStrFromFloat(&tmp, pw_gapv, 3);
            ajStrAppendS(&cmd, tmp);
        }
    }

    /* Multiple alignments */

    /* using existing tree or generating new tree? */
    if(use_dend)
    {
        ajStrAppendC(&cmd, " -usetree=");
        ajStrAppendS(&cmd, dend_filename);
    }
    else
    {
	/* use tmp file to hold dend file, will read back in later */
	tmp_dendfilename = emma_getUniqueFileName();
        ajStrAppendC(&cmd, " -newtree=");
        ajStrAppendS(&cmd, tmp_dendfilename);
    }

    if(ma_matrix)
    {
	if(are_prot)
	    ajStrAppendC(&cmd, " -matrix=");
	else
	    ajStrAppendC(&cmd, " -pwmatrix=");
	ajStrAppendS(&cmd, ajFileGetPrintnameS(ma_matrix));
    }
    else
    {
	if(are_prot)
	{
	    ajStrAppendC(&cmd, " -matrix=");
	    ajStrAppendS(&cmd, m1str);
	}
	else
	{
	    ajStrAppendC(&cmd, " -dnamatrix=");
	    ajStrAppendS(&cmd, m2str);
	}
    }

    ajStrAppendC(&cmd, " -gapopen=");
    ajStrFromFloat(&tmp, gapc, 3);
    ajStrAppendS(&cmd, tmp);
    ajStrAppendC(&cmd, " -gapext=");
    ajStrFromFloat(&tmp, gapv, 3);
    ajStrAppendS(&cmd, tmp);
    ajStrAppendC(&cmd, " -gapdist=");
    ajStrFromInt(&tmp, gap_dist);
    ajStrAppendS(&cmd, tmp);
    ajStrAppendC(&cmd, " -hgapresidues=");
    ajStrAppendS(&cmd, hgapres);

    if(!endgaps)
	ajStrAppendC(&cmd, " -endgaps");

    if(norgap)
	ajStrAppendC(&cmd, " -nopgap");

    if(nohgap)
	ajStrAppendC(&cmd, " -nohgap");

    ajStrAppendC(&cmd, " -maxdiv=");
    ajStrFromInt(&tmp, maxdiv);
    ajStrAppendS(&cmd, tmp);


    /*  run clustalw */

/*    ajFmtError("..%s..\n\n", ajStrGetPtr( cmd)); */
    ajDebug("Executing '%S'\n", cmd);

    ajSysExecS(cmd);

    /* produce alignment file only if one was produced */
    if(!only_dend)
    {
	/* read in tmp alignment output file to output through EMBOSS output */

	seqin = ajSeqinNew();
	/*
	**  add the Usa format to the start of the filename to tell EMBOSS
	**  format of file
	*/
	ajStrInsertC(&tmp_aln_outfile, 0, "msf::");
	ajSeqinUsa(&seqin, tmp_aln_outfile);
	seqset = ajSeqsetNew();
	if(ajSeqsetRead(seqset, seqin))
	{
	    ajSeqoutWriteSet(seqout, seqset);


	    ajSeqoutClose(seqout);
	    ajSeqinDel(&seqin);

	    /* remove the Usa from the start of the string */
	    ajStrCutStart(&tmp_aln_outfile, 5);
	}
	else
	    ajFmtError("Problem writing out EMBOSS alignment file\n");
    }


    /* read in new tmp dend file (if produced) to output through EMBOSS */
    if(tmp_dendfilename!=NULL)
    {
	tmp_dendfile = ajFileNewInNameS( tmp_dendfilename);

	if(tmp_dendfile!=NULL){
	while(ajReadlineTrim(tmp_dendfile, &line))
	    ajFmtPrintF(dend_outfile, "%s\n", ajStrGetPtr( line));

	ajFileClose(&tmp_dendfile);
	ajSysFileUnlinkS(tmp_dendfilename);
    }
    }


    ajSysFileUnlinkS(tmpFilename);

    if(!only_dend)
	ajSysFileUnlinkS(tmp_aln_outfile);

    ajStrDel(&pw_matrix);
    ajStrDel(&matrix);
    ajStrDel(&pw_dna_matrix);
    ajStrDel(&dna_matrix);
    ajStrDel(&tmp_dendfilename);
    ajStrDel(&dend_filename);
    ajStrDel(&tmp_aln_outfile);
    ajStrDel(&pwmstr);
    ajStrDel(&pwdstr);
    ajStrDel(&m1str);
    ajStrDel(&m2str);
    ajStrDel(&hgapres);
    ajStrDel(&cmd);
    ajStrDel(&tmp);
    ajStrDel(&tmpFilename);
    ajStrDel(&line);

    ajFileClose(&dend_outfile);
    ajFileClose(&tmp_dendfile);
    ajFileClose(&dend_file);
    ajFileClose(&pairwise_matrix);
    ajFileClose(&ma_matrix);

    ajSeqallDel(&seqall);
    ajSeqsetDel(&seqset);
    ajSeqDel(&seq);
    ajSeqoutDel(&seqout);
    ajSeqoutDel(&fil_file);
    ajSeqinDel(&seqin);

    embExit();

    return 0;
}
コード例 #22
0
static void domainalign_ProcessStampFile(AjPStr in, 
					 AjPStr out,
					 AjPDomain domain, 
					 ajint noden, 
					 AjPFile logf)
{
    AjPFile  outf = NULL;  /* Output file pointer.          */
    AjPFile   inf = NULL;  /* Input file pointer.           */
    AjPStr  temp1 = NULL;  /* Temporary string.             */
    AjPStr  temp2 = NULL;  /* Temporary string.             */
    AjPStr  temp3 = NULL;  /* Temporary string.             */
    AjPStr   line = NULL;  /* Line of text from input file. */
    ajint     blk = 1;     /* Count of the current block in the input file.
			      Block 1 is the numbering and protein sequences, 
			      Block 2 is the secondary structure, 
			      Block 3 is the Very/Less/Post similar records*/
    AjBool     ok = ajFalse;
    
    
    /* Initialise strings. */
    line    = ajStrNew();
    temp1    = ajStrNew();
    temp2    = ajStrNew();
    temp3    = ajStrNew();


    /* Open input and output files. */
    if(!(inf=ajFileNewInNameS(in)))
	ajFatal("Could not open input file in domainalign_ProcessStampFile");
    



    /* Start of code for reading input file. 
       Ignore everything up to first line beginning with 'Number'. */
    while((ajReadlineTrim(inf,&line)))
    {
	/* ajFileReadLine will trim the tailing \n. */
	if((ajStrGetCharPos(line, 1)=='\0'))
	{
	    ok = ajTrue;
	    break;
	}
    }
    
    
    
    /* Read rest of input file. */
    if(ok)
    {
	/* Write DOMAIN classification records to file. */
	if(!(outf=ajFileNewOutNameS(out)))
	 ajFatal("Could not open output file in domainalign_ProcessStampFile");

	
	if((domain->Type == ajSCOP))
	{
	    ajFmtPrintF(outf,"# TY   SCOP\n# XX\n");
	    ajFmtPrintF(outf,"# CL   %S",domain->Scop->Class);
	    ajFmtPrintSplit(outf,domain->Scop->Fold,"\n# XX\n# FO   ",
			    75," \t\n\r");
	    ajFmtPrintSplit(outf,domain->Scop->Superfamily,"# XX\n# SF   ",
			    75," \t\n\r");
	    ajFmtPrintSplit(outf,domain->Scop->Family,"# XX\n# FA   ",
			    75," \t\n\r");
	    ajFmtPrintF(outf,"# XX\n");
	}
	else
	{
	    ajFmtPrintF(outf,"# TY   CATH\n# XX\n");
	    ajFmtPrintF(outf,"# CL   %S",domain->Cath->Class);
	    ajFmtPrintSplit(outf,domain->Cath->Architecture,"\n# XX\n# AR   ",
			    75," \t\n\r");
	    ajFmtPrintSplit(outf,domain->Cath->Topology,"# XX\n# TP   ",75,
			    " \t\n\r");
	    ajFmtPrintSplit(outf,domain->Cath->Superfamily,"# XX\n# SF   ",75,
			    " \t\n\r");
	    ajFmtPrintF(outf,"# XX\n");
	}
	if((domain->Type == ajSCOP))
	{
	    if(noden==1) 
		ajFmtPrintF(outf,"# SI   %d\n# XX",domain->Scop->Sunid_Class);
	    else if(noden==2)
		ajFmtPrintF(outf,"# SI   %d\n# XX",domain->Scop->Sunid_Fold);
	    else if(noden==3)
		ajFmtPrintF(outf,"# SI   %d\n# XX",
			    domain->Scop->Sunid_Superfamily);
	    else if(noden==4) 	
		ajFmtPrintF(outf,"# SI   %d\n# XX",
			    domain->Scop->Sunid_Family);
	    else
		ajFatal("Node number error in domainalign_ProcessStampFile");
	}
	else
	{
	    if(noden==5) 
		ajFmtPrintF(outf,"# SI   %d\n# XX", domain->Cath->Class_Id);
	    else if(noden==6)
		ajFmtPrintF(outf,"# SI   %d\n# XX", domain->Cath->Arch_Id);
	    else if(noden==7)
		ajFmtPrintF(outf,"# SI   %d\n# XX",domain->Cath->Topology_Id);
	    else if(noden==8)
		ajFmtPrintF(outf,"# SI   %d\n# XX",
			    domain->Cath->Superfamily_Id);
	    else if(noden==9)  
		ajFmtPrintF(outf,"# SI   %d\n# XX",domain->Cath->Family_Id);
	    else
		ajFatal("Node number error in domainalign_ProcessStampFile");
	}   



	while((ajReadlineTrim(inf,&line)))
	{
	    /* Increment counter for block of file. */
	    if((ajStrGetCharPos(line, 1)=='\0'))
	    {
		blk++;
		if(blk==4)
		    blk=1;
	    
		continue;
	    }



	    /* Block of numbering line and protein sequences. */
	    if(blk==1)
	    {
		/* Print the number line out as it is. */
		if(ajStrPrefixC(line,"Number"))
		    ajFmtPrintF(outf,"\n# %7s %S\n"," ", line);
		else
		{
		    /* Read only the 7 characters
		       of the domain identifier
		       code in. */
		    ajFmtScanS(line, "%S", &temp1);
		    ajStrAssignSubS(&temp2, temp1, 0, 6);


		    /* Read the sequence. */
		    ajStrAssignSubS(&temp3, line, 13, 69);
		    ajStrExchangeSetCC(&temp3, " ", "X");
		    ajFmtPrintF(logf, "Replaced ' ' in STAMP alignment "
				"with 'X'\n");
		    ajStrFmtUpper(&temp3);
		

		    /* Write domain id code and sequence out. */
		    ajFmtPrintF(outf,"%-15S%7d %S%7d\n",
				temp2, 0, temp3, 0);
		}
	    }
	    /* Secondary structure filled with '????' (unwanted). */
	    else if(blk==2)
	    {
		continue;
	    }
	    /* Similarity lines. */
	    else
	    {
		if(ajStrPrefixC(line,"Post"))
		{
		    /* Read the sequence. */
		    ajStrAssignSubS(&temp3, line, 13, 69);

		    /* Write post similar line out. */
		    ajFmtPrintF(outf,"%-15s%7s %S\n","# Post_similar", " ",
				temp3);
		}
		/* Ignore Very and Less similar lines. */
		else continue;
	    }
	}
    }
    else /* ok == ajFalse. */
    {
	ajWarn("\n***********************************************\n"
	       "* STAMP was called but output file was EMPTY! *\n"
	       "*   NO OUTPUT FILE GENERATED FOR THIS NODE.   *\n"
	       "***********************************************\n");
	ajFmtPrintF(logf, "STAMP called but output file empty.  "
		    "No output file for this node!");
    }
    


    /* Clean up and close input and output files. */
    ajFileClose(&outf);
    ajFileClose(&inf);
    ajStrDel(&line);
    ajStrDel(&temp1);
    ajStrDel(&temp2);
    ajStrDel(&temp3);
    

    /* All done. */
    return;
}
コード例 #23
0
AjBool ajDebugTest(const char* token)
{
    AjPStr filename = NULL;
    const char* debugtestname = ".debugtest";
    char* ctoken = NULL;
    AjPStr line = NULL;
    AjPStr strtoken = NULL;
    AjPStr rest = NULL;
    static ajint depth    = 0;

    struct 
    {
        ajuint count;
        ajuint max;
    } *stats;
    
    if(depth)
        return ajFalse;

    depth++;

    if(!messDebugTestInit)
    {
        filename = ajStrNewC(debugtestname);

        if(ajFilenameExists(filename))
        {
            messDebugTestFile = ajFileNewInNameS(filename);
        }
        else
        {
            ajFmtPrintS(&filename, "%s%s%s",
                        getenv("HOME"), SLASH_STRING, debugtestname); 
            if(ajFilenameExists(filename))
                messDebugTestFile = ajFileNewInNameS(filename);
        }
        ajStrDel(&filename);

        if(messDebugTestFile) 
        {
            messDebugTestTable = ajTablecharNewLen(256);

            while(ajReadlineTrim(messDebugTestFile, &line))
            {
                if(ajStrExtractFirst(line, &rest, &strtoken))
                {
                    AJNEW0(stats);
                    ctoken = ajCharNewS(strtoken);
                    if(ajStrIsInt(rest))
                        ajStrToUint(rest, &stats->max);
                    else
                        stats->max = UINT_MAX;
                    ajTablePut(messDebugTestTable, ctoken, stats);
                    ctoken = NULL;
                    stats = NULL;
                }
            }

            ajStrDel(&line);
            ajStrDel(&strtoken);
            ajStrDel(&rest);
            ajFileClose(&messDebugTestFile);
        }
        messDebugTestInit = ajTrue;
     }

    depth--;
    
    if(!messDebugTestTable)
        return ajFalse;

    depth++;
    stats = ajTableFetch(messDebugTestTable, token);
    depth--;
    

    if(!stats)
        return ajFalse;

    if(!stats->max)
        return ajTrue;

    if(stats->count++ >= stats->max)
        return ajFalse;

    return ajTrue;
}
コード例 #24
0
static void jaspscan_ParseInput(const AjPStr dir, const AjPStr jaspdir,
				const AjPStr mats, const AjPStr excl,
				ajuint *recurs, AjPList ret)
{
    ajuint nm = 0;
    ajuint ne = 0;
    AjPStr *carr = NULL;
    AjPStr *earr = NULL;
    AjPFile inf = NULL;
    AjPStr line = NULL;
    AjPStr comm = NULL;
    AjPStr val  = NULL;
    
    ajuint i;
    ajuint j;
    
    char c;

    ajuint rlen = 0;
    

    if(*recurs > JASPSCAN_RECURS)
	ajFatal("Too many recursion levels in matrix list files");
    
    line = ajStrNew();
    comm = ajStrNew();
    

    if(mats)
    {
	nm = ajArrCommaList(mats,&carr);	
	for(i=0; i < nm; ++i)
	{
	    if(ajStrGetCharFirst(carr[i]) != '@')
		ajStrFmtUpper(&carr[i]);
	
	    if(ajStrMatchC(carr[i],"ALL"))
	    {
		jaspscan_GetFileList(dir, jaspdir, "*", ret);
		ajListSortUnique(ret, ajStrVcmp, jaspscan_strdel);
	    }
	    else if(ajStrGetCharFirst(carr[i]) == '@')
	    {
		ajStrTrimStartC(&carr[i],"@");
		inf = ajFileNewInNameS(carr[i]);
		if(!inf)
		    ajFatal("Cannot open list file %S",carr[i]);

		while(ajReadlineTrim(inf,&line))
		{
		    ajStrRemoveWhite(&line);
		    c = ajStrGetCharFirst(line);
		    if(c == '#' || c== '!')
			continue;
		    if(ajStrGetLen(comm))
			ajStrAppendC(&comm,",");
		    ajStrFmtUpper(&line);
		    ajStrAppendS(&comm,line);
		}

		*recurs += 1;
		jaspscan_ParseInput(dir,jaspdir,comm,NULL,recurs,ret);
		*recurs -= 1;
		ajListSortUnique(ret, ajStrVcmp, jaspscan_strdel);	    

		ajFileClose(&inf);
	    }
	    else
	    {
		jaspscan_GetFileList(dir,jaspdir,ajStrGetPtr(carr[i]),ret);
		ajListSortUnique(ret, ajStrVcmp, jaspscan_strdel);
	    }
	}

	for(i=0; i < nm; ++i)
	    ajStrDel(&carr[i]);

	AJFREE(carr);
    }
    
    


    if(excl)
    {
	ne = ajArrCommaList(excl,&earr);
	
	for(i=0; i < ne; ++i)
	{
	    if(ajStrGetCharFirst(earr[i]) != '@')
		ajStrFmtUpper(&earr[i]);
	
	    if(ajStrGetCharFirst(earr[i]) == '@')
	    {
		ajStrTrimStartC(&earr[i],"@");
		inf = ajFileNewInNameS(earr[i]);
		if(!inf)
		    ajFatal("Cannot open list file %S",earr[i]);

		while(ajReadlineTrim(inf,&line))
		{
		    ajStrRemoveWhite(&line);
		    c = ajStrGetCharFirst(line);
		    if(c == '#' || c== '!')
			continue;
		    if(ajStrGetLen(comm))
			ajStrAppendC(&comm,",");
		    ajStrFmtUpper(&line);
		    ajStrAppendS(&comm,line);
		}

		*recurs += 1;
		jaspscan_ParseInput(dir,jaspdir,NULL,comm,recurs,ret);
		*recurs -= 1;
		ajListSortUnique(ret, ajStrVcmp, jaspscan_strdel);	    

		ajFileClose(&inf);
	    }
	    else
	    {
		ajStrAssignS(&line,earr[i]);
		ajStrAppendC(&line,J_EXT);
		rlen = ajListGetLength(ret);
		for(j=0; j < rlen; ++j)
		{
		    ajListPop(ret,(void **)&val);
		    if(ajStrSuffixS(val,line))
			ajStrDel(&val);
		    else
			ajListPushAppend(ret,(void *)val);
		}
		
	    }
	} 


	for(i=0; i < ne; ++i)
	    ajStrDel(&earr[i]);
	AJFREE(earr);
    }
    

    ajStrDel(&line);
    ajStrDel(&comm);

    return;
}
コード例 #25
0
int main(int argc, char **argv) {

  embInitPV("kdnapenny", argc, argv, "KBWS", "1.0.8");

  struct soap soap;
  char* jobid;
  char* result;

  AjPSeqall  seqall;
  AjPSeq     seq;
  AjPFile    outf;
  AjPStr     substr;
  AjPStr     inseq = NULL;

  seqall = ajAcdGetSeqall("seqall");
  outf   = ajAcdGetOutfile("outfile");

  AjPStr    tmp         = NULL;
  AjPStr    tmpFileName = NULL;
  AjPSeqout fil_file;
  AjPStr    line        = NULL;
  AjPStr    sizestr     = NULL;
  ajint     thissize    = 0;
  ajint     nb          = 0;
  AjBool    are_prot    = ajFalse;
  ajint     size        = 0;
  AjPFile   infile;

  tmp = ajStrNewC("fasta");

  fil_file    = ajSeqoutNew();
  tmpFileName = getUniqueFileName();

  if( !ajSeqoutOpenFilename(fil_file, tmpFileName) ) {
    embExitBad();
  }

  ajSeqoutSetFormatS(fil_file, tmp);

  while (ajSeqallNext(seqall, &seq)) {
    if (!nb) {
      are_prot  = ajSeqIsProt(seq);
    }
    ajSeqoutWriteSeq(fil_file, seq);
    ++nb;
  }
  ajSeqoutClose(fil_file);
  ajSeqoutDel(&fil_file);

  if (nb < 2) {
    ajFatal("Multiple alignments need at least two sequences");
  }

  infile = ajFileNewInNameS(tmpFileName);

  while (ajReadline(infile, &line)) {
    ajStrAppendS(&inseq,line);
    ajStrAppendC(&inseq,"\n");
  }

  soap_init(&soap);

  char* in0;
  in0 = ajCharNewS(inseq);
  if ( soap_call_ns1__runDnapenny( &soap, NULL, NULL, in0, &jobid ) == SOAP_OK ) {
    fprintf(stderr,"Jobid: %s\n",jobid);
  } else {
    soap_print_fault(&soap, stderr);
  }

  int check = 0;
  while ( check == 0 ) {
    if ( soap_call_ns1__checkStatus( &soap, NULL, NULL, jobid,  &check ) == SOAP_OK ) {
      fprintf(stderr,"*");
    } else {
      soap_print_fault(&soap, stderr);
    }
    sleep(3);
  }
  fprintf(stderr, "\n");

  if ( soap_call_ns1__getResult( &soap, NULL, NULL, jobid,  &result ) == SOAP_OK ) {
    substr = ajStrNewC(result);
    ajFmtPrintF(outf,"%S\n",substr);
  } else {
    soap_print_fault(&soap, stderr);
  }

  ajSysFileUnlinkS(tmpFileName);

  soap_destroy(&soap);
  soap_end(&soap);
  soap_done(&soap);

  ajFileClose(&outf);
  ajSeqallDel(&seqall);
  ajSeqDel(&seq);
  ajStrDel(&substr);

  embExit();

  return 0;
}
コード例 #26
0
/* @prog domainreso ***********************************************************
**
** Removes low resolution domains from a DCF file (domain
** classification file).
**
******************************************************************************/
int main(ajint argc, char **argv)
{

    AjPList     cpdb_path  = NULL; /* Location of coordinate files for input */
    AjPStr      cpdb_name  = NULL; /* Name of coordinate file                */
    AjPStr      temp       = NULL; /* temp string                            */
    AjPStr      temp2      = NULL; /* temp string                            */
    AjPList     entry      = NULL; /* List of pdb codes with resolution      */
                                   /* ABOVE the threshold                    */
    AjPStr     *entryarr   = NULL; /* entry as an array                      */
    

    AjPFile     fptr_cpdb  = NULL; /* Pointer to current coordinate file     */
    AjPFile     dcfin      = NULL; /* DCF input file                         */
    AjPFile     dcfout     = NULL; /* DCF output file                        */

    AjPPdb      pdb        = NULL; /* Pdb object pointer                     */    
    AjPDomain   domain     = NULL; /* Domain structure                       */
 

    float       threshold  = 0.0;  /* Resolution threshold                   */
    ajint       num        = 0;    /* number of nodes in list                */

    ajint       type       = 0;    /* Type of domain (ajSCOP or ajCATH) in 
				      the DCF file                           */


    
    
                                           
    /* Read data from acd */
    embInitPV("domainreso",argc,argv,"DOMAINATRIX",VERSION);
    cpdb_path     = ajAcdGetDirlist("cpdbpath");    
    threshold     = ajAcdGetFloat("threshold");
    dcfin         = ajAcdGetInfile("dcfinfile");
    dcfout        = ajAcdGetOutfile("dcfoutfile");




    
    /* Allocate strings etc. */
    cpdb_name     = ajStrNew();
    temp          = ajStrNew();

    /* Create list . */
    entry    = ajListNew();


    /* Create list of files in CPDB directory. */
    

    /* Determine number of nodes on list    */
    num = ajListGetLength(cpdb_path);



/*
 domainreso reads a directory of clean coordinate files file, creates a list 
 of the files, then reads every list entry and extracts the resolution of the 
 structure.  If the value is less than a threshold (user defined) then the 
 domain identifier is pushed onto a list.  The DCF file (domain classification 
 file) is then read and domain identifiers compared to those on the list, if 
 found then the domain structure data is written the new DCF file.    
*/


    type = ajDomainDCFType(dcfin);


    /* Start of main application loop                         */
    /* Produce list of pdb codes with resolution              */
    /* ABOVE the threshold.                                   */
    while(ajListPop(cpdb_path,(void **)&temp))
    {
        /* Open coordinate file. */
        if((fptr_cpdb=ajFileNewInNameS(temp))==NULL)
        {
	    ajWarn("Could not open cpdb file");
            ajStrDel(&temp);
            continue;       
        }
	ajFmtPrint("%S\n", temp);
	fflush(stdout);
	
	

        /* Read coordinate data file. */ 
	pdb = ajPdbReadFirstModelNew(fptr_cpdb);
        

        /* Check if resolution is above threshold. */
        if(pdb->Reso > threshold)
	{
	    /* assign ID to list. */
	    temp2=ajStrNew();
	    ajStrAssignS(&temp2, pdb->Pdb);
	    ajListPush(entry, (AjPStr) temp2);
	}        
	
        /* Close coordinate file and tidy up*/
        ajPdbDel(&pdb);
        ajFileClose(&fptr_cpdb);
	ajStrDel(&temp);
    }
    num = ajListGetLength(entry);
    

    /* Sort the list of pdb codes & convert to an array. */
    ajListSort(entry, domainreso_StrComp);
    ajListToarray(entry, (void ***)&entryarr);
    
    
    /* Read DCF file and compare IDs to those in list          
     if not present then write domain structure data to output. . */
    while((domain=(ajDomainReadCNew(dcfin, "*", type))))
    {
	/* DOMAIN id not found in the list of domains with resolution 
	   above the threshold, so include it in the output file. */
	if((domainreso_StrBinSearchDomain(ajDomainGetId(domain), 
					  entryarr, num))==-1)
	    ajDomainWrite(dcfout, domain);

        /* Delete domain structure. */
        ajDomainDel(&domain);
    }


    /* Tidy up. */
    ajStrDel(&temp2);

    ajStrDel(&cpdb_name);
    ajFileClose(&dcfout);
    ajFileClose(&dcfin);
    ajListFree(&cpdb_path);
    ajListFree(&entry);
    AJFREE(entryarr);
    
  
    /* Return. */
    ajExit();
    return 0;
}
コード例 #27
0
ファイル: genret.c プロジェクト: ktnyt/GEMBASSY
int main(int argc, char *argv[])
{
  embInitPV("genret", argc, argv, "GEMBASSY", "1.0.3");

  AjPSeqall seqall;
  AjPSeq seq      = NULL;
  AjPStr inseq    = NULL;
  AjPStr gene     = NULL;
  AjPStr access   = NULL;
  AjBool accid    = ajTrue;
  AjPStr argument = NULL;
  AjPFile outfile = NULL;

  AjPStr seqid  = NULL;
  AjPStr restid = NULL;

  AjBool valid = ajFalse;
  AjBool isseq = ajFalse;
  AjBool isgbk = ajFalse;

  AjPFilebuff buff = NULL;
  AjPFile  tmpfile = NULL;
  AjPStr   tmpname = NULL;

  AjPStr regexstr = NULL;
  AjPStrTok token = NULL;
  AjPRegexp regex = NULL;

  AjPStr url  = NULL;
  AjPStr base = NULL;
  AjPStr head = NULL;
  AjPStr line = NULL;

  seqall   = ajAcdGetSeqall("sequence");
  access   = ajAcdGetString("access");
  gene     = ajAcdGetString("gene");
  argument = ajAcdGetString("argument");
  accid    = ajAcdGetBoolean("accid");
  outfile  = ajAcdGetOutfile("outfile");

  if(
     ajStrMatchC(access, "translation") ||
     ajStrMatchC(access, "get_exon") ||
     ajStrMatchC(access, "get_exons") ||
     ajStrMatchC(access, "get_cdsseq") ||
     ajStrMatchC(access, "get_gbkseq") ||
     ajStrMatchC(access, "get_geneseq") ||
     ajStrMatchC(access, "get_intron") ||
     ajStrMatchC(access, "getseq") ||
     ajStrMatchC(access, "seq") ||
     ajStrMatchC(access, "around_startcodon") ||
     ajStrMatchC(access, "around_stopcodon") ||
     ajStrMatchC(access, "before_startcodon") ||
     ajStrMatchC(access, "before_stopcodon") ||
     ajStrMatchC(access, "after_startcodon") ||
     ajStrMatchC(access, "after_stopcodon")
     )
    {
      isseq = ajTrue;
    }
  else if(ajStrMatchC(access, "annotate") ||
          ajStrMatchC(access, "output"))
    {
      isgbk = ajTrue;
    }
  else
    {
      ajFmtPrintF(outfile, "gene,%S\n", access);
    }

  base = ajStrNewC("rest.g-language.org");

  ajStrExchangeCC(&argument, " ", "/");
  ajStrExchangeCC(&argument, ",", "/");
  ajStrExchangeCC(&argument, "\t", "/");
  ajStrExchangeCC(&argument, "\r", "/");
  ajStrExchangeCC(&argument, "\n", "/");

  if(ajStrMatchC(gene, "*"))
    {
      ajStrInsertK(&gene, 0, '.');
    }

  if(ajStrPrefixC(gene, "@") || ajStrPrefixC(gene, "list::"))
    {
      ajStrExchangeCC(&gene, "@", "");
      ajStrExchangeCC(&gene, "list::", "");
      ajStrAssignS(&tmpname, gene);

      tmpfile = ajFileNewInNameS(tmpname);

      if(!tmpfile)
        {
          ajDie("List file (%S) open error\n", tmpname);
        }

      gene = ajStrNew();

      while(ajReadline(tmpfile, &line))
        {
          ajStrAppendS(&gene, line);
        }

      ajFileClose(&tmpfile);
      ajStrDel(&tmpname);
      ajStrDel(&line);
    }

  tmpname = ajStrNew();
  gAssignUniqueName(&tmpname);

  while(ajSeqallNext(seqall, &seq))
    {
      inseq = ajStrNew();

      if(!accid)
        {
          if(gFormatGenbank(seq, &inseq))
            {
              tmpfile = ajFileNewOutNameS(tmpname);

              if(!tmpfile)
                {
                  ajDie("Output file (%S) open error\n", tmpname);
                }

              ajFmtPrintF(tmpfile, "%S", inseq);

              ajFileClose(&tmpfile);

              ajFmtPrintS(&url, "http://%S/upload/upl.pl", base);

              gFilePostSS(url, tmpname, &restid);

              ajStrDel(&url);

              ajSysFileUnlinkS(tmpname);
            }
          else
            {
              ajWarn("Sequence does not have features\n"
                     "Proceeding with sequence accession ID\n");
              accid = ajTrue;
            }
        }


      ajStrAssignS(&seqid, ajSeqGetAccS(seq));

      if(ajStrGetLen(seqid) == 0)
        {
          ajStrAssignS(&seqid, ajSeqGetNameS(seq));
        }

      if(ajStrGetLen(seqid) == 0)
        {
          ajWarn("No valid header information\n");
        }

      if(accid)
        {
          ajStrAssignS(&restid, seqid);
          if(ajStrGetLen(seqid) == 0)
            {
              ajDie("Cannot proceed without header with -accid\n");
            }

          if(!gValID(seqid))
            {
              ajDie("Invalid accession ID:%S, exiting\n", seqid);
            }
        }

      url = ajStrNew();

      if(isgbk)
        {
          ajFmtPrintS(&url, "http://%S/%S/%S", base, restid, access);
        }
      else
        {
          ajFmtPrintS(&url, "http://%S/%S/*/%S/%S", base, restid, access, argument);
        }

      if(!gFilebuffURLS(url, &buff))
        {
          ajDie("GET error from %S\n", url);
        }

      while(ajBuffreadLine(buff, &line))
        {
          if(isgbk){
            ajFmtPrintF(outfile, "%S", line);
            continue;
          }

          ajStrRemoveLastNewline(&line);

          regex = ajRegCompC("^>");

          if(ajRegExec(regex, line))
            {
              head = ajStrNew();

              ajStrAssignS(&head, line);
              ajStrTrimStartC(&head, ">");

              valid = ajFalse;

              token = ajStrTokenNewC(ajStrNewS(gene), " ,\t\r\n");

              while(ajStrTokenNextParse(token, &regexstr))
                {
                  if(ajStrGetLen(regexstr))
                    {
                      regex = ajRegComp(regexstr);

                      if(ajRegExec(regex, line))
                        {
                          valid = ajTrue;
                          if(ajStrIsAlnum(regexstr))
                            {
                              ajStrExchangeSC(&gene, regexstr, "");
                            }
                        }

                      ajRegFree(&regex);
                    }
                }
            }
          else
            {
              if(valid)
                {
                  if(isseq)
                    {
                      ajStrFmtWrap(&line, 60);
                      ajFmtPrintF(outfile, ">%S\n%S\n", head, line);
                    }
                  else
                    {
                      ajFmtPrintF(outfile, "%S,%S\n", head, line);
                    }

                  valid = ajFalse;
                }
            }
        }

      ajFileClose(&outfile);

      ajStrDel(&restid);
      ajStrDel(&seqid);
      ajStrDel(&inseq);
    }

  ajSeqallDel(&seqall);
  ajSeqDel(&seq);
  ajStrDel(&access);
  ajStrDel(&gene);

  embExit();
}
コード例 #28
0
int main(int argc, char **argv) {

  embInitPV("kclustalw", argc, argv, "KBWS", "1.0.8");

  struct soap soap;
  struct ns1__clustalwInputParams params;
  char* jobid;
  char* result;

  AjPSeqall  seqall;
  AjPSeq     seq;
  AjPFile    outf;
  AjPFile    outf_dnd;
  AjPStr     substr;
  AjPStr     inseq = NULL;
  AjPStr     alignment;
  AjPStr     output;
  AjPStr     matrix;
  AjPStr     outorder;
  ajint      ktup;
  ajint      window;
  ajint      gapopen;
  float      gapext;
  ajint      gapdist;
  AjBool     endgaps;
  ajint      pairgap;
  ajint      topdiags;
  AjPStr     score;
  AjBool     tossgaps;
  AjBool     kimura;
  AjPStr     outputtree;
  AjBool     tree;
  AjBool     quicktree;
  AjBool     align;
  AjPStr     clustering;
  ajint      numiter;
  AjPStr     iteration;
  alignment  = ajAcdGetString("alignment");
  output     = ajAcdGetString("output");
  matrix     = ajAcdGetString("matrix");
  outorder   = ajAcdGetString("outorder");
  ktup       = ajAcdGetInt("ktup");
  window     = ajAcdGetInt("window");
  gapopen    = ajAcdGetInt("gapopen");
  gapext     = ajAcdGetFloat("gapext");
  gapdist    = ajAcdGetInt("gapdist");
  endgaps    = ajAcdGetBoolean("endgaps");
  pairgap    = ajAcdGetInt("pairgap");
  topdiags   = ajAcdGetInt("topdiags");
  score      = ajAcdGetString("score");
  tossgaps   = ajAcdGetBoolean("tossgaps");
  kimura     = ajAcdGetBoolean("kimura");
  outputtree = ajAcdGetString("outputtree");
  tree       = ajAcdGetBoolean("tree");
  quicktree  = ajAcdGetBoolean("quicktree");
  align      = ajAcdGetBoolean("align");
  clustering = ajAcdGetString("clustering");
  numiter    = ajAcdGetInt("numiter");
  iteration  = ajAcdGetString("iteration");

  seqall = ajAcdGetSeqall("seqall");
  outf   = ajAcdGetOutfile("outfile");
  outf_dnd = ajAcdGetOutfile("dndoutfile");
  params.alignment = ajCharNewS(alignment);
  params.output = ajCharNewS(output);
  params.matrix = ajCharNewS(matrix);
  params.outorder = ajCharNewS(outorder);
  params.ktup = ktup;
  params.window = window;
  params.gapopen = gapopen;
  params.gapext = gapext;
  params.gapdist = gapdist;
  if (endgaps) {
    params.endgaps = xsd__boolean__true_;
  } else {
    params.endgaps = xsd__boolean__false_;
  }
  params.pairgap = pairgap;
  params.topdiags = topdiags;
  params.score = ajCharNewS(score);

  if (tossgaps) {
    params.tossgaps = xsd__boolean__true_;
  } else {
    params.tossgaps = xsd__boolean__false_;
  }
  if (kimura) {
    params.kimura = xsd__boolean__true_;
  } else {
    params.kimura = xsd__boolean__false_;
  }
  params.outputtree = ajCharNewS(outputtree);
  if (tree) {
    params.tree = xsd__boolean__true_;
  } else {
    params.tree = xsd__boolean__false_;
  }
  if (quicktree) {
    params.quicktree = xsd__boolean__true_;
  } else {
    params.quicktree = xsd__boolean__false_;
  }
  if (align) {
    params.align = xsd__boolean__true_;
  } else {
    params.align = xsd__boolean__false_;
  }
  params.clustering = ajCharNewS(clustering);
  params.numiter = numiter;
  params.iteration = ajCharNewS(iteration);

  AjPStr     tmp = NULL;
  AjPStr     tmpFileName = NULL;
  AjPSeqout  fil_file;
  AjPStr     line = NULL; /* if "AjPStr line; -> ajReadline is not success!" */
  AjPStr sizestr = NULL;
  ajint thissize;

  ajint      nb = 0;
  AjBool     are_prot = ajFalse;
  ajint      size = 0;
  AjPFile    infile;

  tmp = ajStrNewC("fasta");

  fil_file = ajSeqoutNew();
  tmpFileName = getUniqueFileName();

  if( !ajSeqoutOpenFilename(fil_file, tmpFileName) ) {
    embExitBad();
  }

  ajSeqoutSetFormatS(fil_file, tmp);

  while (ajSeqallNext(seqall, &seq)) {
    if (!nb) {
      are_prot  = ajSeqIsProt(seq);
    }
    ajSeqoutWriteSeq(fil_file, seq);
    ++nb;
  }
  ajSeqoutClose(fil_file);
  ajSeqoutDel(&fil_file);

  if (nb < 2) {
    ajFatal("Multiple alignments need at least two sequences");
  }

  infile = ajFileNewInNameS(tmpFileName);

  while (ajReadline(infile, &line)) {
    ajStrAppendS(&inseq,line);
    ajStrAppendC(&inseq,"\n");
  }

  soap_init(&soap);

  char* in0;
  in0 = ajCharNewS(inseq);
  if ( soap_call_ns1__runClustalw( &soap, NULL, NULL, in0, &params, &jobid ) == SOAP_OK ) {
    fprintf(stderr,"Jobid: %s\n",jobid);
  } else {
    soap_print_fault(&soap, stderr);
  }

  int check = 0;
  while ( check == 0 ) {
    if ( soap_call_ns1__checkStatus( &soap, NULL, NULL, jobid,  &check ) == SOAP_OK ) {
      fprintf(stderr,"*");
    } else {
      soap_print_fault(&soap, stderr);
    }
    sleep(3);
  }
  fprintf(stderr,"\n");

  char* type;

  type = "out";
  if(soap_call_ns1__getMultiResult( &soap, NULL, NULL, jobid, type, &result )== SOAP_OK) {
    substr = ajStrNewC(result);
    fprintf(stdout, "%s\n", ajStrGetPtr(substr));
  } else {
    soap_print_fault(&soap, stderr);
  }

  type = "aln";
  if(soap_call_ns1__getMultiResult( &soap, NULL, NULL, jobid, type, &result )== SOAP_OK) {
    substr = ajStrNewC(result);
    ajFmtPrintF(outf,"%S\n",substr);
  } else {
    soap_print_fault(&soap, stderr); 
  }

  type = "dnd";
  if(soap_call_ns1__getMultiResult( &soap, NULL, NULL, jobid, type, &result )== SOAP_OK) {
    substr = ajStrNewC(result);
    ajFmtPrintF(outf_dnd,"%S\n",substr);
  } else {
    soap_print_fault(&soap, stderr); 
  }


  ajSysFileUnlinkS(tmpFileName);

  soap_destroy(&soap);
  soap_end(&soap);
  soap_done(&soap);

  ajFileClose(&outf_dnd);

  ajFileClose(&outf);

  ajSeqallDel(&seqall);
  ajSeqDel(&seq);
  ajStrDel(&substr);

  embExit();

  return 0;
}
コード例 #29
0
int main(ajint argc, char **argv)
{
    AjPList    pdb_path     =NULL;  /* Path of pdb files */
    AjPStr     pdb_name     =NULL;  /* Name of pdb file  */
    AjPDirout  ccf_path    =NULL;   /* Path of ccf files */
    AjPStr     ccf_name    =NULL;   /* Name of ccf file  */
    AjPStr     pdbid        =NULL;  /* PDB code          */
    AjPStr     pdbid_temp   =NULL;  /* PDB code          */
    
    AjBool     ccfnaming   =ajFalse;   
    /* True == use the pdbid code to name the output file, 
       False== use the name of the original pdb file*/
    
    /* Mask non-amino acid groups in protein chains that do not 
       contain a C-alpha atom. The group will not appear in either 
       the CO or SQ records of the clean coordinate file */
    AjBool     camask     =ajFalse;  
    
    
    /* Mask amino acids in protein chains that do not contain a 
       C-alpha atom. The amino acid will appear not appear in the 
       CO record but will still be present in the SQ record of the 
       clean coordinate file */
    AjBool     camask1    =ajFalse;   
    
    /* Mask residues or groups in protein chains with a single atom only */
    AjBool     atommask     =ajFalse;  
    
    AjPStr     temp         =NULL; /* Temp string */   
    AjPStr     msg          =NULL; /* Error message */
   AjPStr     base_name    =NULL; /* Name of pdb file w/o path or 
				      extension */
    
    
    AjPFile    pdb_inf      =NULL; /* pdb input file pointer */
    AjPFile    ccf_outf    =NULL; /* ccf output file pointer */
    AjPFile    logf         =NULL; /* log file pointer*/ 
    
    AjPPdb     pdb          =NULL; /* Pdb structure (for parsed data) */
    
    ajint      min_chain_size=0; /* Minimum length of a SEQRES chain 
				    for it to be parsed */
    ajint      max_mismatch=0; /* Max. no. residues to trim when checking
				  for missing C-terminal SEQRES residues. */
    ajint      max_trim=0;   /* Maximum number of permissible 
				  mismatches between the ATOM and 
				  SEQRES sequences */
    ajint      pos          =0; /* Location of the file extension in 
				   the pdb file name */
    
    
    
    
    /*     THIS_DIAGNOSTIC  
	   tempstr=ajStrNew();    
	   ajStrAssignC(&tempstr,     "diagnostics");
	   tempfile=ajFileNewOutNameS(tempstr);
	   ajStrDel(&tempstr);*/
    
    
    /* Initialise strings */
    ccf_name     = ajStrNew();
    pdb_name      = ajStrNew();
    temp          = ajStrNew();
    msg           = ajStrNew();
    base_name     = ajStrNew();
    pdbid         = ajStrNew();    
    pdbid_temp    = ajStrNew();    
    
    
    /* Read data from acd */
    embInitPV("pdbparse",argc,argv,"STRUCTURE",VERSION);

    pdb_path     = ajAcdGetDirlist("pdbpath");
    ccf_path     = ajAcdGetOutdir("ccfoutdir");
    logf         = ajAcdGetOutfile("logfile");
    min_chain_size=ajAcdGetInt("chnsiz");
    max_mismatch =ajAcdGetInt("maxmis");
    max_trim     =ajAcdGetInt("maxtrim");
    ccfnaming    = ajAcdGetBoolean("ccfnaming");
    camask       = ajAcdGetBoolean("camask");
    camask1      = ajAcdGetBoolean("camaska");
    atommask     = ajAcdGetBoolean("atommask");
    
    
    /* Check directories*/
    
    /*Start of main application loop*/
    while(ajListPop(pdb_path,(void **)&temp))
    {
	ajFmtPrint("Processing %S\n", temp);   
	ajFmtPrintF(logf, "%S\n", temp);    
	
	
		
	
	/* Read pdb file*/
	if((pdb_inf=ajFileNewInNameS(temp))==NULL)
	{
	    ajFmtPrintS(&msg, "Could not open for reading %S ", 
			temp);
	    ajWarn(ajStrGetPtr(msg));
	    ajFmtPrintF(logf, "%-15s%S\n//\n", "FILE_OPEN", temp); 
	    ajStrDel(&temp);	
	    continue;	    
	}
	
	
	
	/* Assign pdb id code from file name */
	ajStrAssignS(&pdbid, temp);
	ajFilenameTrimPathExt(&pdbid);

	if(MAJSTRGETLEN(pdbid)>4)
	{
	    /* The file name is longer than expected (and probably contains a 
	       prefix). Take the last four characters to be the pdbid code */
	    ajStrAssignSubS(&pdbid_temp, pdbid, pos-4, pos-1);
	    ajStrAssignS(&pdbid, pdbid_temp);
	}
	else if(MAJSTRGETLEN(pdbid)<4)
	    ajFatal("Could not determine pdbid code from file name (%S)", pdbid);
	

	/* Parse pdb file and write pdb structure */
	if(!(pdb=ajPdbReadRawNew(pdb_inf, pdbid, min_chain_size, max_mismatch, 
				 max_trim, camask, camask1, atommask, logf)))
	{	
	    ajFmtPrintS(&msg, "Clean coordinate file not generated for %S", temp);
	    ajWarn(ajStrGetPtr(msg));
	    ajFmtPrintF(logf, "%-15s%S\n//\n", "NO_OUTPUT", temp); 
	    
	    ajFileClose(&pdb_inf);
	    ajStrDel(&temp);
	    continue;
	}
	
	
	/* Open clean coordinate file for writing*/
	if(ccfnaming)
	    ajStrAssignS(&ccf_name, pdb->Pdb);
	else
	    ajStrAssignS(&ccf_name, temp);
	ajStrFmtLower(&ccf_name);

	
	if(!(ccf_outf=ajFileNewOutNameDirS(ccf_name, ccf_path)))
	{
	    ajFmtPrintS(&msg, "Could not open %S for writing", 
			ccf_name);
	    ajWarn(ajStrGetPtr(msg));
	    ajFmtPrintF(logf, "%-15s%S\n//\n", "FILE_OPEN", ccf_name); 
	    ajFileClose(&pdb_inf);
	    ajPdbDel(&pdb);
	    ajStrDel(&temp);	
	    continue;
	}  
	
	
	
	
	
	/* Write pdb file */
	if(!ajPdbWriteAll(ccf_outf, pdb))
	{
	    ajFmtPrintS(&msg, "Could not write file %S", ccf_name);
	    ajWarn(ajStrGetPtr(msg));
	    ajFmtPrintF(logf, "%-15s%S\n//\n", "FILE_WRITE", ccf_name); 
	    
	    ajFmtPrintS(&temp, "rm %S", ccf_name);
	    ajFmtPrint("%S", temp);
	    ajSysSystem(temp);

	    ajFileClose(&pdb_inf);
	    ajFileClose(&ccf_outf);
	    ajPdbDel(&pdb);
	    ajStrDel(&temp);	
	    continue;
	}
	
	
	
	/* Tidy up*/
	ajFileClose(&pdb_inf);
	ajFileClose(&ccf_outf);
	ajPdbDel(&pdb);   
	ajStrDel(&temp);	
	
	
	ajFmtPrintF(logf, "//\n");    
    }

    /*End of main application loop*/        





    /*Tidy up */
    ajListFree(&pdb_path);
    ajStrDel(&pdb_name);
    ajDiroutDel(&ccf_path);
    ajStrDel(&ccf_name);
    ajStrDel(&base_name);
    ajStrDel(&pdbid);
    ajStrDel(&pdbid_temp);
    ajStrDel(&msg);
    
    ajFileClose(&logf);
    
    
    
    /* DIAGNOSTIC
       ajFileClose(&tempfile);
       */
    
    /* Return */
    ajExit();
    return 0;
}	    
コード例 #30
0
/* @prog seqnr **************************************************************
**
** Removes redundancy from DHF files (domain hits files) or other files of 
** sequences.
**
****************************************************************************/
int main(int argc, char **argv)
{
    /* Variable declarations */
    AjPList    in        = NULL;    /* Names of domain hits files (input).   */
    AjPStr     inname    = NULL;    /* Full name of the current DHF file.    */
    AjPFile    inf       = NULL;    /* Current DHF file.                     */
    EmbPHitlist infhits   = NULL;   /* Hitlist from DHF file                 */
    AjBool     dosing    = ajFalse; /* Filter using singlet sequences.       */
    AjPDir     singlets  = NULL;    /* Singlets (input).                     */
    AjBool     dosets    = ajFalse; /* Filter using sets of sequences.       */
    AjPDir     insets    = NULL;    /* Sets (input).                         */
    AjPStr     mode      = NULL;    /* Mode of operation                     */
    ajint      moden     = 0;       /* Mode 1: single threshold for redundancy
				       removal, 2: lower and upper thresholds
				       for redundancy removal.               */
    float      thresh    = 0.0;     /* Threshold for non-redundancy.         */
    float      threshlow = 0.0;	    /* Threshold (lower limit).              */
    float      threshup  = 0.0;	    /* Threshold (upper limit).              */
    AjPMatrixf matrix    = NULL;    /* Substitution matrix.                  */
    float      gapopen   = 0.0;     /* Gap insertion penalty.                */
    float      gapextend = 0.0;     /* Gap extension penalty.                */
    AjPDirout  out       = NULL;    /* Domain hits files (output).           */
    AjPFile    outf      = NULL;    /* Current DHF file (output).            */
    AjBool     dored     = ajFalse; /* True if redundant hits are output.    */
    AjPDirout  outred    = NULL;    /* DHF files for redundant hits (output).*/
    AjPFile    redf      = NULL;    /* Current DHF file redundancy (output). */
    AjPStr     outname   = NULL;    /* Name of output file (re-used).        */
    AjPFile    logf      = NULL;    /* Log file pointer.                     */
 
    AjBool     ok        = ajFalse; /* Housekeeping.                         */
    AjPSeqset  seqset    = NULL;    /* Seqset (re-used).                     */
    AjPSeqin   seqin     = NULL;    /* Seqin (re-used).                      */
    AjPList    seq_list  = NULL;    /* Main list for redundancy removal.     */
    EmbPDmxNrseq seq_tmp = NULL;    /* Temp. pointer for making seq_list.    */
    ajint      seq_siz   = 0;       /* Size of seq_list.                     */
    AjPUint    keep      = NULL;    /* 1: Sequence in seq_list was classed as
				       non-redundant, 0: redundant.          */
    AjPUint    nokeep    = NULL;    /* Inversion of keep array.              */
    ajint      nseqnr    = 0;       /* No. non-redundant seqs. in seq_list.  */
    

    AjPStr     filtername= NULL;    /* Name of filter file (re-used).        */
    AjPFile    filterf   = NULL;    /* Current filter file.                  */
    EmbPHitlist hitlist   = NULL;   /* Hitlist from input file (re-used).    */
    AjPScopalg scopalg   = NULL;    /* Scopalg from input file.              */
    ajint      x         = 0;       /* Housekeeping.                         */
    

    


    /* Read data from acd. */
    embInitPV("seqnr",argc,argv,"DOMSEARCH",VERSION);

    in        = ajAcdGetDirlist("dhfinpath");
    dosing    = ajAcdGetToggle("dosing");
    singlets    = ajAcdGetDirectory("singletsdir");
    dosets    = ajAcdGetToggle("dosets");
    insets    = ajAcdGetDirectory("insetsdir");
    mode      = ajAcdGetListSingle("mode");  
    thresh    = ajAcdGetFloat("thresh");
    threshlow = ajAcdGetFloat("threshlow");
    threshup  = ajAcdGetFloat("threshup");
    matrix    = ajAcdGetMatrixf("matrix");
    gapopen   = ajAcdGetFloat("gapopen");
    gapextend = ajAcdGetFloat("gapextend");
    out       = ajAcdGetOutdir("dhfoutdir");
    dored     = ajAcdGetToggle("dored");
    outred    = ajAcdGetOutdir("redoutdir");
    logf      = ajAcdGetOutfile("logfile");



    /* Housekeeping. */
    filtername  = ajStrNew();
    outname     = ajStrNew();


    if(!(ajStrToInt(mode, &moden)))
	ajFatal("Could not parse ACD node option");


    
    /* Process each DHF (input) in turn. */
    while(ajListPop(in,(void **)&inname))
    {
	ajFmtPrint("Processing %S\n", inname);
	ajFmtPrintF(logf, "//\n%S\n", inname);


	seq_list    = ajListNew();
	keep        = ajUintNew();  	    
	nokeep      = ajUintNew();  	    	
	
	/**********************************/
	/*         Open DHF file          */
	/**********************************/
	if((inf = ajFileNewInNameS(inname)) == NULL)
	    ajFatal("Could not open DHF file %S", inname);

	/* Read DHF file. */
	ok = ajFalse;
	if(!(infhits = embHitlistReadFasta(inf)))
	{
	    ajWarn("embHitlistReadFasta call failed in seqnr");
	    ajFmtPrintF(logf, "embHitlistReadFasta call failed in seqnr\n");
	
	    /* Read sequence set instead. */ 
	    seqset = ajSeqsetNew();
	    seqin  = ajSeqinNew();
	    ajSeqinUsa(&seqin, inname);
	
	    if(!(ajSeqsetRead(seqset, seqin)))
		ajFatal("SeqsetRead failed in seqsearch_psialigned");

	    if(ajSeqsetGetSize(seqset))
		ok = ajTrue;
	}
	else
	    if(infhits->N)
		ok = ajTrue;

	/* Close DHF file. */
	ajFileClose(&inf);
	
	/* Process empty DHF files (should never occur). */
	if(!ok)
	{		
	    ajWarn("Empty input file %S\n", inname);
	    ajFmtPrintF(logf, "Empty input file %S\n", inname);
	    if(infhits)
		embHitlistDel(&infhits);
	    if(seqset)
		ajSeqsetDel(&seqset);
	    if(seqin)
		ajSeqinDel(&seqin);
	    continue;
	}	

	
	/* 1.  Create list of sequences from the main input directory.. */
	if(infhits)
	{
	    for(x=0; x<infhits->N; x++)
	    {
		AJNEW0(seq_tmp);
		seq_tmp->Seq = ajSeqNew();
		ajStrAssignS(&seq_tmp->Seq->Acc,infhits->hits[x]->Acc);
		ajStrAssignS(&seq_tmp->Seq->Seq,infhits->hits[x]->Seq);
		ajListPushAppend(seq_list,seq_tmp);		
	    }
	} 
	else
	{	 
	    for(x=0;x<ajSeqsetGetSize(seqset);x++)
	    {
		AJNEW0(seq_tmp);
		seq_tmp->Seq = ajSeqNew();
		ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x));
		ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x));
		ajListPushAppend(seq_list,seq_tmp);		
	    }
	    ajSeqsetDel(&seqset);
	    ajSeqinDel(&seqin);
	}
	
    

	/**********************************/
	/*   Open singlets filter file    */
	/**********************************/
	if(dosing)
	{
	    /* Open singlets file. */
	    ajStrAssignS(&filtername, inname);
	    ajFilenameTrimPathExt(&filtername);
	    ajStrInsertS(&filtername, 0, ajDirGetPath(singlets));
	    ajStrAppendC(&filtername, ".");
	    ajStrAppendS(&filtername, ajDirGetExt(singlets));

	
	    if((filterf = ajFileNewInNameS(filtername)) == NULL)
	    {
		ajWarn("Could not open DHF file %S",
		       filtername);
		ajFmtPrint("Could not open singlets filter file %S",
			   filtername);
	    }
	    else
	    {
		/* Read DHF file. */
		ok = ajFalse;
		if(!(hitlist = embHitlistReadFasta(filterf)))
		{
		    ajWarn("embHitlistReadFasta call failed in seqnr");
		    ajFmtPrintF(logf, 
				"embHitlistReadFasta call failed in seqnr\n");
	
		    /* Read sequence set instead. */ 
		    seqset = ajSeqsetNew();
		    seqin  = ajSeqinNew();
		    ajSeqinUsa(&seqin, inname);
	
		    if(!(ajSeqsetRead(seqset, seqin)))
			ajFatal("SeqsetRead failed in seqnr");

		    if(ajSeqsetGetSize(seqset))
			ok = ajTrue;
		}
		else
		    if(hitlist->N)
			ok = ajTrue;


		/* Close DHF file. */
		ajFileClose(&filterf);

	
		/* Process empty DHF files (should never occur). */
		if(!ok)
		{		
		    ajWarn("Empty singlets filter file %S\n", filtername);
		    ajFmtPrintF(logf, "Empty singlets filter file %S\n", 
				filtername);
		    /* No continue this time. */
		}	

	
		/* 2. Add sequences from filter directories to List but mark 
		   them up (they are considered in the redundancy calculation 
		   but never appear in the output files). */
		if(hitlist)
		{
		    for(x=0; x<hitlist->N; x++)
		    {
			AJNEW0(seq_tmp);
			seq_tmp->Seq = ajSeqNew();
			seq_tmp->Garbage = ajTrue;
			ajStrAssignS(&seq_tmp->Seq->Acc,hitlist->hits[x]->Acc);
			ajStrAssignS(&seq_tmp->Seq->Seq,hitlist->hits[x]->Seq);
			ajListPushAppend(seq_list,seq_tmp);		
		    }
		    embHitlistDel(&hitlist);
		} 
		else
		{	 
		    for(x=0;x<ajSeqsetGetSize(seqset);x++)
		    {
			AJNEW0(seq_tmp);
			seq_tmp->Seq = ajSeqNew();
			seq_tmp->Garbage = ajTrue;
			ajStrAssignS(&seq_tmp->Seq->Acc,
				     ajSeqsetGetseqAccS(seqset, x));
			ajStrAssignS(&seq_tmp->Seq->Seq,
				     ajSeqsetGetseqSeqS(seqset, x));
			ajListPushAppend(seq_list,seq_tmp);		
		    }
		    ajSeqsetDel(&seqset);
		    ajSeqinDel(&seqin);
		}
	    }
	}
	
	
	
	/**********************************/
	/*      Open sets filter file     */
	/**********************************/
	if(dosets)
	{
	    /* Open sets file. */
	    ajStrAssignS(&filtername, inname);
	    ajFilenameTrimPathExt(&filtername);
	    ajStrInsertS(&filtername, 0, ajDirGetPath(insets));
	    ajStrAppendC(&filtername, ".");
	    ajStrAppendS(&filtername, ajDirGetExt(insets));

	
	    if((filterf = ajFileNewInNameS(filtername)) == NULL)
	    {
		ajWarn("Could not open DAF file %S", filtername);
		ajFmtPrint("Could not open sets filter file %S", filtername);
	    }
	    else
	    {
		/* Read DAF file. */
		ok = ajFalse;

		if(!(ajDmxScopalgRead(filterf, &scopalg)))
		{
		    ajWarn("ajDmxScopalgRead call failed in seqnr");
		    ajFmtPrintF(logf,
				"ajDmxScopalgRead call failed in seqnr\n");
	
		    /* Read sequence set instead. */ 
		    seqset = ajSeqsetNew();
		    seqin  = ajSeqinNew();
		    ajSeqinUsa(&seqin, inname);
		    
		    if(!(ajSeqsetRead(seqset, seqin)))
			ajFatal("SeqsetRead failed in seqnr");

		    if(ajSeqsetGetSize(seqset))
			ok = ajTrue;
		}
		else
		    if(scopalg->N)
			ok = ajTrue;


		/* Close DHF file. */
		ajFileClose(&filterf);

	
		/* Process empty DHF files (should never occur). */
		if(!ok)
		{		
		    ajWarn("Empty sets filter file %S\n",
			   filtername);
		    ajFmtPrintF(logf, "Empty sets filter file %S\n",
				filtername);
		    /* No continue this time. */
		}	

	
		/* 2. Add sequences from filter directories to List but mark 
		   them up (they are considered in the redundancy calculation 
		   but never appear in the output files).. */
		if(scopalg)
		{
		    for(x=0; x<scopalg->N; x++)
		    {
			AJNEW0(seq_tmp);
			seq_tmp->Seq = ajSeqNew();
			seq_tmp->Garbage = ajTrue;
			ajStrAssignS(&seq_tmp->Seq->Acc,scopalg->Codes[x]);
			ajStrAssignS(&seq_tmp->Seq->Seq,scopalg->Seqs[x]);
			/* Remove gap char's & whitespace. */
			ajStrRemoveGap(&seq_tmp->Seq->Seq);  
			ajListPushAppend(seq_list,seq_tmp);		
		    }
		    ajDmxScopalgDel(&scopalg);
		} 
		else
		{	 
		    for(x=0;x<ajSeqsetGetSize(seqset);x++)
		    {
			AJNEW0(seq_tmp);
			seq_tmp->Seq = ajSeqNew();
			seq_tmp->Garbage = ajTrue;
			ajStrAssignS(&seq_tmp->Seq->Acc,
				     ajSeqsetGetseqAccS(seqset, x));
			ajStrAssignS(&seq_tmp->Seq->Seq,
				     ajSeqsetGetseqSeqS(seqset, x));
			ajListPushAppend(seq_list,seq_tmp);		
		    }
		    ajSeqsetDel(&seqset);
		    ajSeqinDel(&seqin);
		}
	    }
	}
	
	
	/* 4. Identify redundant domains.. */
	if(moden == 1)
	{
	    if((!embDmxSeqNR(seq_list, &keep, &nseqnr, matrix, gapopen, 
			     gapextend, thresh, ajTrue)))
		ajFatal("embDmxSeqNR failure in seqnr");
	}		
	else
	{
	    if((!embDmxSeqNRRange(seq_list, &keep, &nseqnr, matrix, gapopen, 
			     gapextend, threshlow, threshup, ajTrue)))
		ajFatal("embDmxSeqNR failure in seqnr");
	}	
	seq_siz = ajListGetLength(seq_list);
	for(x=0; x<seq_siz; x++)
	    if(ajUintGet(keep, x) == 1)
		ajUintPut(&nokeep, x, 0);
	    else
		ajUintPut(&nokeep, x, 1);
	

	/* Create output files. */
	ajStrAssignS(&outname, inname);
	ajFilenameTrimPathExt(&outname);
	outf = ajFileNewOutNameDirS(outname, out);
	if(dored)
	    redf = ajFileNewOutNameDirS(outname, outred);
	

	/* 5. Write non-redundant domains to main output directory.  
	   6.  If specified, write redundant domains to output directory. */
	embHitlistWriteSubsetFasta(outf, infhits, keep);
	if(dored)
	    embHitlistWriteSubsetFasta(redf, infhits, nokeep);

	embHitlistDel(&infhits);
	ajFileClose(&outf);
	ajFileClose(&redf);
	ajStrDel(&inname);

	while(ajListPop(seq_list, (void **) &seq_tmp))
	{
	    ajSeqDel(&seq_tmp->Seq);
	    AJFREE(seq_tmp);
	}
	ajListFree(&seq_list);
	ajUintDel(&keep);	
	ajUintDel(&nokeep);
    }	    


    /* Tidy up. */
    ajListFree(&in);
    if(singlets)
	ajDirDel(&singlets);
    if(insets)
	ajDirDel(&insets);
    ajDiroutDel(&out);
    if(outred)
	ajDiroutDel(&outred);
    ajFileClose(&logf);

    ajMatrixfDel(&matrix);

    ajStrDel(&filtername);
    ajStrDel(&outname);
    ajStrDel(&mode);


    embExit();
    return 0;
}