int main(int argc, char **argv) { /* ACD data item variables */ AjPFile hmmfile = NULL; AjPSeqset seqfile = NULL; AjPFile mapali = NULL; AjPFile withali = NULL; AjPAlign o = NULL; AjBool m = ajFalse; AjBool q = ajFalse; /* Housekeeping variables */ AjPStr cmd = NULL; AjPStr fmt = NULL; AjBool fmtok = ajFalse; AjPStr rnd = NULL; AjPSeqout rndo = NULL; /* ACD file processing */ embInitPV("ehmmalign",argc,argv,"HMMERNEW",VERSION); hmmfile = ajAcdGetInfile("hmmfile"); seqfile = ajAcdGetSeqset("seqfile"); mapali = ajAcdGetInfile("mapali"); withali = ajAcdGetInfile("withali"); o = ajAcdGetAlign("o"); m = ajAcdGetBoolean("m"); q = ajAcdGetBoolean("q"); /* MAIN APPLICATION CODE */ /* 1. Housekeeping */ cmd = ajStrNew(); fmt = ajStrNew(); rnd = ajStrNew(); /* 2. Re-write seqfile to a temporary file in a format (fasta) HMMER can understand. We cannot just pass the name of seqfile to HMMER as the name provided might be a USA which HMMER would not understand. */ ajFilenameSetTempname(&rnd); rndo = ajSeqoutNew(); if(!ajSeqoutOpenFilename(rndo, rnd)) ajFatal("Terminal ajSeqFileNewOut failure. Email EMBOSS helpdesk!\n"); ajSeqoutSetFormatC(rndo, "fasta"); ajSeqoutWriteSet(rndo, seqfile); ajSeqoutClose(rndo); ajSeqoutDel(&rndo); /* 3. Build hmmalign command line */ /* Command line is built in this order: i. Application name. ii. HMMER 'options' (in order they appear in ACD file) iii.HMMER 'options' (that don't appear in ACD file) iv. HMMER & new parameters. */ ajFmtPrintS(&cmd, "%S ", ajAcdGetpathC("hmmalign")); if(mapali) ajFmtPrintAppS(&cmd, " --mapali %s ", ajFileGetNameC(mapali)); if(withali) ajFmtPrintAppS(&cmd, " --withali %s ", ajFileGetNameC(withali)); if(m) ajStrAppendC(&cmd, " -m "); if(q) ajStrAppendC(&cmd, " -q "); /* Ensure output alignment is in user-specified format. */ fmtok=ajTrue; ajStrAssignS(&fmt, ajAlignGetFormat(o)); /* fasta and a2m are identical formats. */ if(ajStrMatchC(fmt, "fasta")) ajStrAssignC(&fmt, "A2M"); else if(ajStrMatchC(fmt, "a2m")) ajStrAssignC(&fmt, "A2M"); else if(ajStrMatchC(fmt, "msf")) ajStrAssignC(&fmt, "MSF"); else if(ajStrMatchC(fmt, "phylip")) ajStrAssignC(&fmt, "PHYLIP"); /* hmmer also supports stockholm, SELEX & Clustal output, EMBOSS does not. EMBOSS supports unknown/multiple/simple and srs output, hmmer does not. */ else fmtok = ajFalse; if(!fmtok) { /* This could be replaced with code to reformat the file. */ ajWarn("Specified output alignment format ('o' ACD option) is " "not understood by HMMER. Using stockholm format instead."); ajStrAssignC(&fmt, "Stockholm"); } /* rnd is the name of the rewritten seqfile. MUST specify FASTA format explicitly. */ ajFmtPrintAppS(&cmd, " --informat FASTA --outformat %S -o %s %s %S", fmt, ajAlignGetFilename(o), ajFileGetNameC(hmmfile), rnd); /* 4. Close ACD files */ ajFileClose(&hmmfile); ajSeqsetDel(&seqfile); ajFileClose(&mapali); ajFileClose(&withali); ajAlignClose(o); ajAlignDel(&o); /* 5. Call hmmalign */ ajFmtPrint("\n%S\n\n", cmd); ajSysExecS(cmd); /* 6. Exit cleanly */ ajSysFileUnlinkS(rnd); ajStrDel(&cmd); ajStrDel(&fmt); ajStrDel(&rnd); embExit(); return 0; }
int main(int argc, char **argv) { AjPStr cl = NULL; AjPStr stmp = NULL; AjPStr squal = NULL; /* AjBool bqual = ajFalse; */ AjPTable preftab = NULL; embInitPV("emira", argc, argv, "MIRA",VERSION); cl = ajStrNewS(ajAcdGetpathC("mira")); stmp = ajStrNew(); preftab = emira_makepreftab(); if(ajAcdIsUserdefinedC("genome")) { squal = ajAcdGetListSingle("genome"); ajFmtPrintAppS(&cl," -genome%S",squal); ajStrDel(&squal); } if(ajAcdIsUserdefinedC("mapping")) { squal = ajAcdGetListSingle("mapping"); ajFmtPrintAppS(&cl," -mapping%S",squal); ajStrDel(&squal); } if(ajAcdIsUserdefinedC("clipping")) { squal = ajAcdGetListSingle("clipping"); ajFmtPrintAppS(&cl," -clipping%S",squal); ajStrDel(&squal); } if(ajAcdIsUserdefinedC("setparam")) { squal = ajAcdGetListSingle("setparam"); if(!ajStrMatchC(squal,"unspecified")) ajFmtPrintAppS(&cl," -%S",squal); ajStrDel(&squal); } emira_dostrings(&cl, preftab); emira_doinfiles(&cl, preftab); emira_dodirectories(&cl, preftab); emira_dobools(&cl, preftab); emira_dointegers(&cl, preftab); emira_dolistsingles(&cl, preftab); ajDebug("Constructed command line: %S\n",cl); #if 0 ajFmtPrint("%S\n",cl); #endif #if 1 system(ajStrGetPtr(cl)); #endif ajStrDel(&stmp); ajStrDel(&cl); ajTablestrFree(&preftab); embExit(); return 0; }
int main(int argc, char **argv) { AjPSeqset seqset = NULL; AjPStr cl = NULL; AjPSeqout seqout = NULL; AjBool pc = ajFalse; AjBool sigp = ajFalse; AjBool plot = ajFalse; const AjPStr ofn = NULL; AjPStr fn = NULL; AjPStr stmp = NULL; AjPFile outf = NULL; embInitPV("eprop", argc, argv, "CBSTOOLS", VERSION); seqset = ajAcdGetSeqset("sequence"); outf = ajAcdGetOutfile("outfile"); plot = ajAcdGetBoolean("plot"); pc = ajAcdGetBoolean("pcprediction"); sigp = ajAcdGetBoolean("signalp"); cl = ajStrNewC("prop "); fn = ajStrNew(); stmp = ajStrNew(); ajFilenameSetTempname(&fn); seqout = ajSeqoutNew(); if(!ajSeqoutOpenFilename(seqout, fn)) ajFatal("Cannot open temporary file %S",fn); ajSeqoutSetFormatC(seqout, "fasta"); ajSeqoutWriteSet(seqout,seqset); ajSeqoutClose(seqout); if(plot) ajStrAppendC(&cl," -g"); if(pc) ajStrAppendC(&cl," -p"); if(sigp) ajStrAppendC(&cl," -s"); ajFmtPrintS(&stmp," %S",fn); ajStrAppendS(&cl,stmp); ofn = ajFileGetNameS(outf); ajFmtPrintS(&stmp," > %S",ofn); ajStrAppendS(&cl,stmp); ajFileClose(&outf); #if 0 ajFmtPrint("%S\n",cl); #endif #if 1 if(system(ajStrGetPtr(cl)) == -1) ajFatal("Command %S failed",cl); #endif ajSysFileUnlink(fn); ajStrDel(&cl); ajStrDel(&fn); ajStrDel(&stmp); ajSeqoutDel(&seqout); ajSeqsetDel(&seqset); embExit(); return 0; }
/* @funcstatic domainalign_stamp ********************************************** ** ** Call STAMP and process files. ** ** @param [r] prevdomain [AjPDomain] Previous domain. ** @param [r] domain [AjPDomain] This domain. ** @param [r] daf [AjPDirout] Domain alignment files. ** @param [r] super [AjPDirout] Superimposition files. ** @param [r] singlets [AjPDirout] Singlet files. ** @param [r] align [AjPStr] Align. ** @param [r] alignc [AjPStr] Alignc. ** @param [r] dom [AjPStr] Dom. ** @param [r] name [AjPStr] Name. ** @param [r] set [AjPStr] Name of set file. ** @param [r] scan [AjPStr] Name of scan file. ** @param [r] sort [AjPStr] Name of sort file. ** @param [r] log [AjPStr] Lof file name. ** @param [r] out [AjPStr] Out file name. ** @param [r] keepsinglets [AjBool] Keep singlet sequences or not. ** @param [r] moden [ajint] Mode number. ** @param [r] noden [ajint] Node number. ** @param [r] nset [ajint] Number in set. ** @param [r] logf [AjPFile] Lof file. ** ** ** @return [void] True on success ** @@ ****************************************************************************/ static void domainalign_stamp(AjPDomain prevdomain, AjPDomain domain, AjPDirout daf, AjPDirout super, AjPDirout singlets, AjPStr align, AjPStr alignc, AjPStr dom, AjPStr name, AjPStr set, AjPStr scan, AjPStr sort, AjPStr log, AjPStr out, AjBool keepsinglets, ajint moden, ajint noden, ajint nset, AjPFile logf) { AjPStr exec = NULL; /* The UNIX command line to be executed. */ AjPFile clusterf = NULL; /* File pointer for log file. */ ajint ncluster = 0; /* Counter for the number of clusters. */ AjPStr line = NULL; /* Holds a line from the log file. */ AjPRegexp rexp = NULL; /* For parsing no. of clusters in log file */ AjPStr temp = NULL; /* A temporary string. */ ajint x = 0; /* Loop counter. */ exec = ajStrNew(); line = ajStrNew(); temp = ajStrNew(); rexp = ajRegCompC("^(Cluster:)"); ajDebug("domainalign_stamp name: '%S'\n", name); /* Call STAMP. */ ajFmtPrintS(&exec, "stamp -l %S -s -n 2 -slide 5 -prefix %S -d %S", dom, name, set); ajFmtPrint("\n%S\n\n", exec); ajSysSystem(exec); ajFmtPrintS(&exec, "sorttrans -f %S -s Sc 2.5", scan); ajFmtPrint("\n%S > %S\n\n", exec, sort); ajSysSystemOut(exec, sort); ajFmtPrintS(&exec, "stamp -l %S -prefix %S", sort, name); ajFmtPrint("\n%S > %S\n\n", exec, log); ajSysSystemOut(exec, log); ajFmtPrintS(&exec, "transform -f %S -g -o %S", sort, alignc); ajFmtPrint("\n%S\n\n", exec); ajSysSystem(exec); /* Count the number of clusters in the log file. */ if(!(clusterf=ajFileNewInNameS(log))) ajFatal("Could not open log file '%S'\n", log); ncluster=0; while(ajReadlineTrim(clusterf,&line)) if(ajRegExec(rexp,line)) ncluster++; ajFileClose(&clusterf); ajDebug("ncluster: %d\n", ncluster); /* Call STAMP ... calculate two fields for structural equivalence using threshold Pij value of 0.5, see stamp manual v4.1 pg 27. */ ajFmtPrintS(&exec,"poststamp -f %S.%d -min 0.5", name, ncluster); ajFmtPrint("%S\n\n", exec); ajSysSystem(exec); /* Call STAMP ... convert block format alignment into clustal format. */ ajFmtPrintS(&exec,"ver2hor -f %S.%d.post", name, ncluster); ajFmtPrint("%S > %S\n\n", exec, out); ajSysSystemOut(exec, out); /* Process STAMP alignment file and generate alignment file for output. */ domainalign_ProcessStampFile(out, align, prevdomain, noden, logf); /* Remove all temporary files. */ for(x=1;x<ncluster+1;x++) { ajFmtPrintS(&temp, "%S.%d", name, x); ajSysFileUnlink(temp); } ajFmtPrintS(&temp, "%S.%d.post", name, ncluster); ajSysFileUnlink(temp); ajStrDel(&exec); ajStrDel(&line); ajStrDel(&temp); ajRegFree(&rexp); return; }
int main(int argc, char **argv) { ajint famn = 0; /* Counter for the families. */ ajint nset = 0; /* No. entries in family. */ ajint last_nodeid = 0; /* SCOP Sunid of last family that was processed. */ AjPStr last_node = NULL; /* Last family that was processed. */ AjPStr exec = NULL; /* The UNIX command line to be executed. */ AjPStr out = NULL; /* Name of stamp alignment file. */ AjPStr align = NULL; /* Name of sequence alignment file. */ AjPStr alignc = NULL; /* Name of structure alignment file. */ AjPStr log = NULL; /* Name of STAMP log file. */ AjPStr dom = NULL; /* Name of file containing single domain. */ AjPStr set = NULL; /* Name of file containing set of domains. */ AjPStr scan = NULL; /* Name of temp. file used by STAMP. */ AjPStr sort = NULL; /* Name of temp. file used by STAMP. */ AjPStr name = NULL; /* Base name of STAMP temp files. */ AjPStr pdbnames = NULL; /* Names of domain pdb files to be passed to TCOFFEEE. */ AjPDir pdb = NULL; /* Path of domain coordinate files (pdb format input). */ AjPDirout daf = NULL; /* Path of sequence alignment files for output. */ AjPDirout super = NULL; /* Path of structure alignment files for output. */ AjPDirout singlets = NULL; /* Path of FASTA singlet sequence files for output. */ AjPStr temp1 = NULL; /* A temporary string. */ AjPFile dcfin = NULL; /* File pointer for original Escop.dat file.*/ AjPFile domf = NULL; /* File pointer for single domain file. */ AjPFile setf = NULL; /* File pointer for domain set file. */ AjPFile logf = NULL; /* Log file. */ AjPDomain domain = NULL; /* Pointer to domain structure. */ AjPDomain prevdomain = NULL; /* Pointer to previous domain structure. */ ajint type = 0; /* Type of domain (ajSCOP or ajCATH) in the DCF file. */ AjPStr *node = NULL; /* Node of alignment . */ ajint noden = 0; /*1: Class (SCOP), 2: Fold (SCOP) etc, see ACD file. */ AjPStr *mode = NULL; /* Mode of operation from acd*/ ajint moden = 0; /* Program mode, 1: MODE_STAMP, 2: MODE_TCOFFEE (not yet implemented). */ AjBool keepsinglets= ajFalse; /*Whether to retain sequences of singlet families and write them to an output file. */ AjPStr temp = NULL; /* A temporary string. */ AjPStr cmd = NULL; /* The command line to execute t-coffee. */ /* Initialise strings etc*/ last_node = ajStrNew(); exec = ajStrNew(); out = ajStrNew(); align = ajStrNew(); alignc = ajStrNew(); log = ajStrNew(); dom = ajStrNew(); set = ajStrNew(); scan = ajStrNew(); sort = ajStrNew(); name = ajStrNew(); temp = ajStrNew(); temp1 = ajStrNew(); cmd = ajStrNew(); pdbnames = ajStrNew(); /* Read data from acd. */ embInitPV("domainalign",argc,argv,"DOMALIGN",VERSION); dcfin = ajAcdGetInfile("dcfinfile"); pdb = ajAcdGetDirectory("pdbdir"); daf = ajAcdGetOutdir("dafoutdir"); super = ajAcdGetOutdir("superoutdir"); singlets = ajAcdGetOutdir("singletsoutdir"); node = ajAcdGetList("node"); mode = ajAcdGetList("mode"); keepsinglets = ajAcdGetToggle("keepsinglets"); logf = ajAcdGetOutfile("logfile"); /* Convert the selected node and mode to an integer. */ if(!(ajStrToInt(node[0], &noden))) ajFatal("Could not parse ACD node option"); if(!(ajStrToInt(mode[0], &moden))) ajFatal("Could not parse ACD node option"); /* Initialise random number generator for naming of temp. files. */ ajRandomSeed(); ajFilenameSetTempname(&name); /* Create names for temp. files. */ ajStrAssignS(&log, name); ajStrAppendC(&log, ".log"); ajStrAssignS(&dom, name); ajStrAppendC(&dom, ".dom"); ajStrAssignS(&set, name); ajStrAppendC(&set, ".set"); ajStrAssignS(&scan, name); ajStrAppendC(&scan, ".scan"); ajStrAssignS(&sort, name); ajStrAppendC(&sort, ".sort"); ajStrAssignS(&out, name); ajStrAppendC(&out, ".out"); /* Initialise last_node with something that is not in SCOP. */ ajStrAssignC(&last_node,"!!!!!"); /* Open STAMP domain set file. */ if(moden == MODE_STAMP) { if(!(setf=ajFileNewOutNameS(set))) ajFatal("Could not open domain set file\n"); } /* Get domain type. */ type = ajDomainDCFType(dcfin); /* Start of main application loop. */ while((domain=(ajDomainReadCNew(dcfin, "*", type)))) { /* A new family. */ if(((domain->Type == ajSCOP) && (((noden==1) && (last_nodeid != domain->Scop->Sunid_Class)) || ((noden==2) && (last_nodeid != domain->Scop->Sunid_Fold)) || ((noden==3) && (last_nodeid != domain->Scop->Sunid_Superfamily))|| ((noden==4) && (last_nodeid != domain->Scop->Sunid_Family)))) || ((domain->Type == ajCATH) && (((noden==5) && (last_nodeid != domain->Cath->Class_Id)) || ((noden==6) && (last_nodeid != domain->Cath->Arch_Id)) || ((noden==7) && (last_nodeid != domain->Cath->Topology_Id)) || ((noden==8) && (last_nodeid != domain->Cath->Superfamily_Id)) || ((noden==9) && (last_nodeid != domain->Cath->Family_Id))))) { /* If we have done the first family. */ if(famn) { /* Create the output file for the alignment - the name will be the same as the Sunid for the DOMAIN family. */ domainalign_writeid(prevdomain, noden, daf, super, &align, &alignc); if(moden == MODE_STAMP) { /* Close domain set file. */ ajFileClose(&setf); /* Call STAMP. */ /* Family with 2 or more entries. */ if(nset > 1) { domainalign_stamp(prevdomain, domain, daf, super, singlets, align, alignc, dom, name, set, scan, sort, log, out, keepsinglets, moden, noden, nset, logf); } else if(keepsinglets) /* Singlet family. */ domainalign_keepsinglets(prevdomain, noden, singlets, logf); /* Open STAMP domain set file. */ if(!(setf=ajFileNewOutNameS(set))) ajFatal("Could not open domain set file\n"); } else { /* Call TCOFEE. */ if(nset > 1) domainalign_tcoffee(prevdomain, out, align, alignc, pdbnames, noden, logf); else if(keepsinglets) /* Singlet family. */ domainalign_keepsinglets(prevdomain, noden, singlets, logf); } /* Set the number of members of the new family to zero. */ nset = 0; /* Clear TCOFFEE argument. */ ajStrSetClear(&pdbnames); } /* Open, write and close STAMP domain file. */ if(moden == MODE_STAMP) { if(!(domf=ajFileNewOutNameS(dom))) ajFatal("Could not open domain file\n"); ajStrAssignS(&temp, ajDomainGetId(domain)); ajStrFmtLower(&temp); ajFmtPrintF(domf, "%S %S { ALL }\n", temp, temp); ajFileClose(&domf); } /* Copy current family name to last_node. */ domainalign_writelast(domain, noden, &last_node, &last_nodeid); /* Copy current domain pointer to prevdomain. */ ajDomainDel(&prevdomain); prevdomain=NULL; ajDomainCopy(&prevdomain, domain); /* Increment family counter. */ famn++; } ajStrAssignS(&temp, ajDomainGetId(domain)); ajStrFmtLower(&temp); /* Write STAMP domain set file. */ if(moden == MODE_STAMP) ajFmtPrintF(setf, "%S %S { ALL }\n", temp, temp); /* Write TCOFFEE argument. */ else { ajStrAppendS(&pdbnames, ajDirGetPath(pdb)); ajStrAppendS(&pdbnames, temp); ajStrAppendC(&pdbnames, "."); ajStrAppendS(&pdbnames, ajDirGetExt(pdb)); ajStrAppendC(&pdbnames, " "); } ajDomainDel(&domain); /* Increment number of members in family. */ nset++; } /* End of main application loop. */ domain=prevdomain; ajFmtPrint("\nProcessing node %d\n", last_nodeid); /* Create the output file for the alignment - the name will be the same as the Sunid for the DOMAIN family. */ domainalign_writeid(prevdomain, noden, daf, super, &align, &alignc); /* Code to process last family. */ if(moden == MODE_STAMP) { /*Close domain set file. */ ajFileClose(&setf); /* ajFmtPrint("\n***** SECOND CALL\n");. */ if(nset > 1) { domainalign_stamp(prevdomain, domain, daf, super, singlets, align, alignc, dom, name, set, scan, sort, log, out, keepsinglets, moden, noden, nset, logf); } else if(keepsinglets) /* Singlet family. */ domainalign_keepsinglets(prevdomain, noden, singlets, logf); } else { /* Call TCOFEE. */ if(nset > 1) domainalign_tcoffee(prevdomain, out, align, alignc, pdbnames, noden, logf); else if(keepsinglets) /* Singlet family. */ domainalign_keepsinglets(prevdomain, noden, singlets, logf); } /* Remove all temporary files. */ ajSysFileUnlink(log); ajSysFileUnlink(dom); ajSysFileUnlink(set); ajSysFileUnlink(scan); ajSysFileUnlink(sort); ajSysFileUnlink(out); ajStrAssignS(&temp, name); ajStrAppendC(&temp, ".mat"); ajSysFileUnlink(temp); /* Tidy up*/ ajDomainDel(&domain); ajFileClose(&dcfin); ajStrDel(&last_node); ajStrDel(&exec); ajStrDel(&log); ajStrDel(&dom); ajStrDel(&set); ajStrDel(&scan); ajStrDel(&sort); ajStrDel(&name); ajStrDel(&out); ajStrDel(&align); ajStrDel(&alignc); ajStrDel(&pdbnames); ajDirDel(&pdb); ajDiroutDel(&daf); ajDiroutDel(&super); ajDiroutDel(&singlets); ajStrDel(&temp); ajStrDel(&temp1); ajStrDel(&node[0]); AJFREE(node); ajStrDel(&mode[0]); AJFREE(mode); ajFileClose(&logf); ajExit(); return 0; }
int main(int argc, char *argv[]) { embInitPV("greporiter", argc, argv, "GEMBASSY", "1.0.3"); AjPSeqall seqall; AjPSeq seq; AjPStr inseq = NULL; AjBool accid = ajFalse; AjPStr restid = NULL; AjPStr seqid = NULL; AjPStr base = NULL; AjPStr url = NULL; AjBool oriloc = 0; AjBool gcskew = 0; AjBool dbonly = 0; ajint difthreshold = 0; AjPFile outf = NULL; AjPFile tmpfile = NULL; AjPStr tmpname = NULL; AjPStr fstname = NULL; AjPFilebuff tmp = NULL; AjPStr line = NULL; AjPSeqout tmpout = NULL; AjPRegexp regex; AjPStr ori = NULL; AjPStr ter = NULL; seqall = ajAcdGetSeqall("sequence"); difthreshold = ajAcdGetInt("difthreshold"); oriloc = ajAcdGetBoolean("oriloc"); gcskew = ajAcdGetBoolean("gcskew"); dbonly = ajAcdGetBoolean("dbonly"); accid = ajAcdGetBoolean("accid"); outf = ajAcdGetOutfile("outfile"); base = ajStrNewC("rest.g-language.org"); gAssignUniqueName(&tmpname); gAssignUniqueName(&fstname); ajStrAppendC(&fstname, ".fasta"); while(ajSeqallNext(seqall, &seq)) { inseq = ajStrNew(); tmpout = ajSeqoutNew(); if(!accid) { if(gFormatGenbank(seq, &inseq)) { tmpfile = ajFileNewOutNameS(tmpname); if(!tmpfile) { ajDie("Output file (%S) open error\n", tmpname); } ajFmtPrintF(tmpfile, "%S", inseq); ajFileClose(&tmpfile); ajFmtPrintS(&url, "http://%S/upload/upl.pl", base); gFilePostSS(url, tmpname, &restid); ajStrDel(&url); ajSysFileUnlinkS(tmpname); } else { if(!ajSeqoutOpenFilename(tmpout, fstname)) { embExitBad(); } ajSeqoutSetFormatS(tmpout,ajStrNewC("fasta")); ajSeqoutWriteSeq(tmpout, seq); ajSeqoutClose(tmpout); ajSeqoutDel(&tmpout); ajFmtPrintS(&url, "http://%S/upload/upl.pl", base); gFilePostSS(url, fstname, &restid); ajStrDel(&url); ajSysFileUnlinkS(fstname); } } ajStrAssignS(&seqid, ajSeqGetAccS(seq)); if(ajStrGetLen(seqid) == 0) { ajStrAssignS(&seqid, ajSeqGetNameS(seq)); } if(ajStrGetLen(seqid) == 0) { ajWarn("No valid header information\n"); } if(accid) { ajStrAssignS(&restid, seqid); if(ajStrGetLen(seqid) == 0) { ajDie("Cannot proceed without header with -accid\n"); } if(!gValID(seqid)) { ajDie("Invalid accession ID:%S, exiting\n", seqid); } } url = ajStrNew(); ajFmtPrintS(&url, "http://%S/%S/rep_ori_ter/oriloc=%d/gcskew=%d/" "difthreshold=%d/dbonly=%d/", base, restid, oriloc, gcskew, difthreshold, dbonly); if(!gFilebuffURLS(url, &tmp)) { ajDie("Failed to download result from:\n%S\n", url); } ajBuffreadLine(tmp, &line); regex = ajRegCompC("([0-9]+),([0-9]+)"); if(ajRegExec(regex, line)) { if(ajRegSubI(regex, 1, &ori), ajRegSubI(regex, 2, &ter)) { ajFmtPrint("%S Origin: %S Terminus %S\n", seqid, ori, ter); } } ajStrDel(&url); ajStrDel(&restid); ajStrDel(&seqid); ajStrDel(&inseq); } ajFileClose(&outf); ajSeqallDel(&seqall); ajSeqDel(&seq); ajStrDel(&base); embExit(); return 0; }
int main(ajint argc, char **argv) { AjPList ccfin = NULL; /* List of CCF (input) files. */ AjPDir pdbin = NULL; /* Path of pdb input files. */ AjPStr pdbprefix = NULL; /* Prefix of pdb input files. */ AjPStr pdb_name = NULL; /* Full name (path/name/extension) of pdb format input file. */ AjPDirout ccfout = NULL; /* Path of coordinate output file. */ AjPStr randomname = NULL; /* Name for temp file tempf. */ AjPStr ccf_this = NULL; AjPStr exec = NULL; AjPStr naccess_str = NULL; AjPStr line = NULL; AjPStr syscmd = NULL; /* Command line arguments. */ AjPStr *mode = NULL; /* Mode of operation from acd. */ AjPFile errf = NULL; /* pdbplus error file pointer. */ AjPFile serrf = NULL; /* stride error file pointer. */ AjPFile nerrf = NULL; /* stride error file pointer. */ AjPFile tempf = NULL; /* Temp file for holding STRIDE output. */ AjPFile ccf_inf = NULL; /* Protein coordinate input file. */ AjPFile ccf_outf = NULL; /* Protein coordinate output file. */ AjIList iter = NULL; AjBool done_naccess= ajFalse; AjBool done_stride = ajFalse; AjBool found = ajFalse; AjPResidue temp_res = NULL; /* Pointer to Residue object. */ AjPPdb pdb_old = NULL; /* Pointer to PDB object - without new stride elements. */ AjPPdb pdb = NULL; /* Pointer to PDB object. */ ajint idn = 0; /* Chain identifier as a number (1,2,...) */ ajint chain_num = 0; /* Chain identifier index (0,1,...). */ ajint tS = 0; /* User-defined threshold size for SSEs. */ ajint nostride = 0; /* No. times stride failed */ ajint nonaccess = 0; /* No. times naccess failed */ ajint nofile = 0; /* No. times of file error */ /* Variables for each item that will be parsed from the ASG line. */ AjPStr res = NULL; /* Residue id from STRIDE ASG line (ALA etc). */ AjPStr res_num = NULL; /* PDB residue number from STRIDE ASG line. */ char pcid = ' '; /* Protein chain identifier from STRIDE or NACESS output (A,B, etc). */ char ss = ' '; /* One-letter secondary structure code from STRIDE ASG line. */ float ph = 0.0; /* Phi angle from STRIDE ASG line. */ float ps = 0.0; /* Psi angle from STRIDE ASG line. */ float sa = 0.0; /* Residue solvent accessible area from STRIDE ASG line. */ float f1 = 0; float f2 = 0; float f3 = 0; float f4 = 0; float f5 = 0; float f6 = 0; float f7 = 0; float f8 = 0; float f9 = 0; float f10 = 0; /* Allocate strings; this section is used for variables that are allocated once only. */ pdb_name = ajStrNew(); res = ajStrNew(); res_num = ajStrNew(); randomname = ajStrNew(); syscmd = ajStrNew(); line = ajStrNew(); naccess_str = ajStrNew(); exec = ajStrNew(); /* Read data from acd. */ embInitPV("pdbplus",argc,argv,"STRUCTURE",VERSION); ccfin = ajAcdGetDirlist("ccfinpath"); pdbin = ajAcdGetDirectory("pdbindir"); pdbprefix = ajAcdGetString("pdbprefix"); ccfout = ajAcdGetOutdir("ccfoutdir"); mode = ajAcdGetList("mode"); errf = ajAcdGetOutfile("logfile"); if(ajStrGetCharFirst(*mode) != '2') serrf = ajAcdGetOutfile("slogfile"); if(ajStrGetCharFirst(*mode) != '1') nerrf = ajAcdGetOutfile("nlogfile"); tS = ajAcdGetInt("thresholdsize"); ajRandomSeed(); ajFilenameSetTempname(&randomname); /* ** Start of main application loop. ** Process each PDB/ protein coordinate file (EMBL format) in turn. */ while(ajListPop(ccfin,(void **)&ccf_this)) { /* Open protein coordinate file. If it cannot be opened, write a message to the error file, delete ccf_this and continue. */ if((ccf_inf = ajFileNewInNameS(ccf_this)) == NULL) { ajWarn("%s%S\n//\n", "clean coordinate file not found: ", ccf_this); ajFmtPrintF(errf, "%s%S\n//\n", "clean coordinate file not found: ", ccf_this); ajStrDel(&ccf_this); nofile++; continue; } ajFmtPrint("Processing %S\n", ccf_this); fflush(stdout); /* Parse protein coordinate data (from clean format file) into AjPPdb object. ajPdbReadAllModelsNew will create the AjPPdb object. */ if(!(pdb_old=ajPdbReadAllModelsNew(ccf_inf))) { ajWarn("ERROR Clean coordinate file read" "error: %S\n//\n", ccf_this); ajFmtPrintF(errf, "ERROR Clean coordinate file read" "error: %S\n//\n", ccf_this); ajFileClose(&ccf_inf); ajStrDel(&ccf_this); nofile++; continue; } ajFileClose(&ccf_inf); ajPdbCopy(&pdb, pdb_old); ajPdbDel(&pdb_old); /* Construct name of corresponding PDB file. NACCESS does *not* generate an output file if the path is './' e.g. naccess ./1rbp.ent , therefore replace './' with null. */ ajStrAssignS(&pdb_name, ajDirGetPath(pdbin)); if(ajStrMatchC(pdb_name, "./") || ajStrMatchC(pdb_name, ".")) ajStrAssignC(&pdb_name, ""); ajStrAppendS(&pdb_name, pdbprefix); ajStrFmtLower(&pdb->Pdb); ajStrAppendS(&pdb_name, pdb->Pdb); ajStrAppendC(&pdb_name, "."); ajStrAppendS(&pdb_name, ajDirGetExt(pdbin)); /* Check corresponding PDB file exists for reading using ajFileStat. */ if(!(ajFilenameExistsRead(pdb_name))) { ajFmtPrintF(errf, "%s%S\n//\n", "PDB file not found: ", pdb_name); ajWarn("%s%S\n//\n", "PDB file not found: ", pdb_name); ajStrDel(&ccf_this); ajPdbDel(&pdb); nofile++; continue; } if(ajStrGetCharFirst(*mode) != '2') { /* ** Create a string containing the STRIDE command line (it needs ** PDB file name & name of temp output file). ** Call STRIDE by using ajSystem. */ ajFmtPrintS(&syscmd, "%S %S -f%S >> %s 2>&1", ajAcdGetpathC("stride"), pdb_name, randomname, ajFileGetNameC(serrf)); ajFmtPrint("%S %S -f%S >> %s 2>&1\n", ajAcdGetpathC("stride"), pdb_name, randomname,ajFileGetNameC(serrf)); system(ajStrGetPtr(syscmd)); /* Open the stride output file */ if (((tempf = ajFileNewInNameS(randomname)) == NULL)) { ajWarn("%s%S\n//\n", "no stride output for: ", pdb_name); ajFmtPrintF(errf, "%s%S\n//\n", "no stride output for: ", pdb_name); nostride++; ajStrDel(&ccf_this); ajPdbDel(&pdb); continue; } else ajFmtPrintF(errf, "%s%S\n//\n", "stride output for: ", pdb_name); done_stride = ajFalse; /* Parse STRIDE output from temp output file a line at a time. */ while(ajReadlineTrim(tempf,&line)) { if(ajStrPrefixC(line,"ASG")) { ajFmtScanS(line, "%*S %S %c %S %*d %c %*S %f %f %f %*S", &res, &pcid, &res_num, &ss, &ph, &ps, &sa); /* ** Populate pdbplus object with the data from this parsed ** line. This means first identifying the chain, then ** finding the residue. */ /* Determine the chain number. ajDmxPdbplusChain does not recognise '-', so change '-' to '.' */ if (pcid == '-') pcid = '.'; /* Get chain number from the chain identifier. */ if(!ajPdbChnidToNum(pcid, pdb, &idn)) { ajWarn("Could not convert chain id %c to chain" " number in pdb file %S\n//\n", pcid, pdb_name); ajFmtPrintF(errf, "Could not convert chain id %c " "to chain number in pdb file %S\n//\n", pcid, pdb_name); continue; } /* ** The chain number that will get written starts at 1, but ** we want an index into an array which must start at 0, ** so subtract 1 from the chain number to get the index. */ chain_num = idn-1; /* ** Iiterate through the list of residues in the Pdb object, ** found switches to true when first residue corresponding ** to the line is found. */ /* iter = ajListIterNewread(pdb->Chains[chain_num]->Atoms); */ iter = ajListIterNewread(pdb->Chains[chain_num]->Residues); found = ajFalse; while((temp_res = (AjPResidue)ajListIterGet(iter))) { /* If we have found the residue we want */ if((ajStrMatchS(res_num, temp_res->Pdb) && ajStrMatchS(res, temp_res->Id3))) { done_stride = ajTrue; found = ajTrue; temp_res->eStrideType = ss; temp_res->Phi = ph; temp_res->Psi = ps; temp_res->Area = sa; } /* If the matching residue has been processed move on to next ASG line, next residue. */ else if(found == ajTrue) break; else /* Matching residue not found yet. */ continue; } ajListIterDel(&iter); } /* End of if ASG loop. */ } /* End of while line loop. */ if(done_stride) ajFmtPrintF(errf, "%s%S\n//\n", "stride data for: ", pdb_name); else { ajFmtPrintF(errf, "%s%S\n//\n", "no stride data for: ", pdb_name); ajWarn("%s%S\n//\n", "no stride data for: ", pdb_name); nostride++; } /* Close STRIDE temp file. & tidy up. */ ajFileClose(&tempf); /* Remove temporary file (stride output file). */ ajFmtPrintS(&exec, "rm %S", randomname); ajSysSystem(exec); /* ** Calculate element serial numbers (eStrideNum)& ammend residue ** objects, count no's of elements and ammend chain object ** (numHelices, num Strands). */ pdbplus_sort(pdb, tS); } if(ajStrGetCharFirst(*mode) != '1') { /* ** Create a string containing the NACCESS command line (it needs ** PDB file name & name of temp output file) & call NACCESS. ** If e.g. /data/structure/pdbfred.ent was parsed and the program ** was run from /stuff, then /stuff/fred.asa and /stuff/fred.rsa ** would be written. These must be deleted once parsed (only ** use the .rsa file here). */ ajFmtPrintS(&syscmd, "%S %S >> %s 2>&1", ajAcdGetpathC("naccess"), pdb_name, ajFileGetNameC(nerrf)); ajFmtPrint("%S %S >> %s 2>&1\n", ajAcdGetpathC("naccess"), pdb_name, ajFileGetNameC(nerrf)); system(ajStrGetPtr(syscmd)); ajStrAssignS(&naccess_str, pdbprefix); ajStrAppendS(&naccess_str, pdb->Pdb); ajStrAppendC(&naccess_str, ".rsa"); /* Open the NACCESS output file. */ if (((tempf = ajFileNewInNameS(naccess_str)) == NULL)) { ajFmtPrintF(errf, "%s%S\n//\n", "no naccess output for: ", pdb_name); ajWarn("%s%S\n//\n", "no naccess output for: ", pdb_name); nonaccess++; ajStrDel(&ccf_this); ajPdbDel(&pdb); continue; } else ajFmtPrintF(errf, "%s%S\n//\n", "naccess output for: ", pdb_name); done_naccess = ajFalse; /* Parse NACCESS output from temp output file a line at a time. */ while(ajReadlineTrim(tempf,&line)) { if(ajStrPrefixC(line,"RES")) { /* Read data from lines. */ if((pcid = line->Ptr[8]) == ' ') ajFmtScanS(line, "%*S %S %S %f %f %f " "%f %f %f %f %f %f %f", &res, &res_num, &f1, &f2, &f3, &f4, &f5, &f6, &f7, &f8, &f9, &f10); else ajFmtScanS(line, "%*S %S %*c %S %f %f " "%f %f %f %f %f %f %f %f", &res, &res_num, &f1, &f2, &f3, &f4, &f5, &f6, &f7, &f8, &f9, &f10); /* Identify the chain, then finding all the residues corresponding to the residue. */ /* Get the chain number from the chain identifier. */ if(!ajPdbChnidToNum(pcid, pdb, &idn)) { ajWarn("Could not convert chain id %c to chain" " number in pdb file %S\n//\n", pcid, pdb_name); ajFmtPrintF(errf, "Could not convert chain id" " %c to chain number in pdb file %S\n//\n", pcid, pdb_name); continue; } /* ** Chain number will start at 1, but we want an index ** into an array which must start at 0, so subtract 1 ** from the chain number to get the index. */ chain_num = idn-1; /* ** Iiterate through the list of residues in the Pdb object, ** temp_res is an AjPResidue used to point to the current ** residue. ** ajBool found switches to true when first residue ** corresponding to the line is found. */ iter = ajListIterNewread(pdb->Chains[chain_num]->Residues); found = ajFalse; while((temp_res = (AjPResidue)ajListIterGet(iter))) { /* If we have found the residue we want, write the residue object. */ if((ajStrMatchS(res_num, temp_res->Pdb) && ajStrMatchS(res, temp_res->Id3))) { found = ajTrue; done_naccess = ajTrue; temp_res->all_abs = f1; temp_res->all_rel = f2; temp_res->side_abs = f3; temp_res->side_rel = f4; temp_res->main_abs = f5; temp_res->main_rel = f6; temp_res->npol_abs = f7; temp_res->npol_rel = f8; temp_res->pol_abs = f9; temp_res->pol_rel = f10; } /* If the matching residues have all been processed. move on to next ASG line, next residue. */ else if(found == ajTrue) break; else /* Matching residues not found yet, move on to next residue. */ continue; } ajListIterDel(&iter); } } if(done_naccess) ajFmtPrintF(errf, "%s%S\n//\n", "naccess data for: ", pdb_name); else { ajFmtPrintF(errf, "%s%S\n//\n", "no naccess data for: ", pdb_name); ajWarn("%s%S\n//\n", "no naccess data for: ", pdb_name); nonaccess++; } /* Remove temporary file (naccess output files). */ ajFileClose(&tempf); ajFmtPrintS(&exec, "rm %S", naccess_str); ajSysSystem(exec); ajStrAssignS(&naccess_str, pdbprefix); ajStrAppendS(&naccess_str, pdb->Pdb); ajStrAppendC(&naccess_str, ".asa"); ajFmtPrintS(&exec, "rm %S", naccess_str); ajSysSystem(exec); ajStrAssignS(&naccess_str, pdbprefix); ajStrAppendS(&naccess_str, pdb->Pdb); ajStrAppendC(&naccess_str, ".log"); ajFmtPrintS(&exec, "rm %S", naccess_str); ajSysSystem(exec); } /* Open CCF (output) file. */ ccf_outf = ajFileNewOutNameDirS(pdb->Pdb, ccfout); /* Write AjPPdb object to the output file in clean format. */ if(!ajPdbWriteAll(ccf_outf, pdb)) { ajWarn("%s%S\n//\n","Could not write results file for: ", pdb->Pdb); ajFmtPrintF(errf,"%s%S\n//\n","Could not write results file for ", pdb->Pdb); } ajFileClose(&ccf_outf); ajPdbDel(&pdb); ajStrDel(&ccf_this); } /* End of main application loop. */ ajFmtPrint("STRIDE failures: %d\n", nostride); ajFmtPrint("NACCESS failures: %d\n", nonaccess); ajFmtPrintF(errf, "\n\nSTRIDE failures: %d\nNACCESS failures: %d\n", nostride, nonaccess); ajListFree(&ccfin); ajDirDel(&pdbin); ajStrDel(&pdbprefix); ajStrDel(&pdb_name); ajDiroutDel(&ccfout); ajStrDel(&res); ajStrDel(&res_num); ajStrDel(&randomname); ajStrDel(&line); ajStrDel(&naccess_str); ajStrDel(&exec); ajStrDel(&syscmd); ajFileClose(&errf); if(ajStrGetCharFirst(*mode) != '2') ajFileClose(&serrf); if(ajStrGetCharFirst(*mode) != '1') ajFileClose(&nerrf); ajStrDel(&mode[0]); AJFREE(mode); ajExit(); return 0; }
/* @prog domainreso *********************************************************** ** ** Removes low resolution domains from a DCF file (domain ** classification file). ** ******************************************************************************/ int main(ajint argc, char **argv) { AjPList cpdb_path = NULL; /* Location of coordinate files for input */ AjPStr cpdb_name = NULL; /* Name of coordinate file */ AjPStr temp = NULL; /* temp string */ AjPStr temp2 = NULL; /* temp string */ AjPList entry = NULL; /* List of pdb codes with resolution */ /* ABOVE the threshold */ AjPStr *entryarr = NULL; /* entry as an array */ AjPFile fptr_cpdb = NULL; /* Pointer to current coordinate file */ AjPFile dcfin = NULL; /* DCF input file */ AjPFile dcfout = NULL; /* DCF output file */ AjPPdb pdb = NULL; /* Pdb object pointer */ AjPDomain domain = NULL; /* Domain structure */ float threshold = 0.0; /* Resolution threshold */ ajint num = 0; /* number of nodes in list */ ajint type = 0; /* Type of domain (ajSCOP or ajCATH) in the DCF file */ /* Read data from acd */ embInitPV("domainreso",argc,argv,"DOMAINATRIX",VERSION); cpdb_path = ajAcdGetDirlist("cpdbpath"); threshold = ajAcdGetFloat("threshold"); dcfin = ajAcdGetInfile("dcfinfile"); dcfout = ajAcdGetOutfile("dcfoutfile"); /* Allocate strings etc. */ cpdb_name = ajStrNew(); temp = ajStrNew(); /* Create list . */ entry = ajListNew(); /* Create list of files in CPDB directory. */ /* Determine number of nodes on list */ num = ajListGetLength(cpdb_path); /* domainreso reads a directory of clean coordinate files file, creates a list of the files, then reads every list entry and extracts the resolution of the structure. If the value is less than a threshold (user defined) then the domain identifier is pushed onto a list. The DCF file (domain classification file) is then read and domain identifiers compared to those on the list, if found then the domain structure data is written the new DCF file. */ type = ajDomainDCFType(dcfin); /* Start of main application loop */ /* Produce list of pdb codes with resolution */ /* ABOVE the threshold. */ while(ajListPop(cpdb_path,(void **)&temp)) { /* Open coordinate file. */ if((fptr_cpdb=ajFileNewInNameS(temp))==NULL) { ajWarn("Could not open cpdb file"); ajStrDel(&temp); continue; } ajFmtPrint("%S\n", temp); fflush(stdout); /* Read coordinate data file. */ pdb = ajPdbReadFirstModelNew(fptr_cpdb); /* Check if resolution is above threshold. */ if(pdb->Reso > threshold) { /* assign ID to list. */ temp2=ajStrNew(); ajStrAssignS(&temp2, pdb->Pdb); ajListPush(entry, (AjPStr) temp2); } /* Close coordinate file and tidy up*/ ajPdbDel(&pdb); ajFileClose(&fptr_cpdb); ajStrDel(&temp); } num = ajListGetLength(entry); /* Sort the list of pdb codes & convert to an array. */ ajListSort(entry, domainreso_StrComp); ajListToarray(entry, (void ***)&entryarr); /* Read DCF file and compare IDs to those in list if not present then write domain structure data to output. . */ while((domain=(ajDomainReadCNew(dcfin, "*", type)))) { /* DOMAIN id not found in the list of domains with resolution above the threshold, so include it in the output file. */ if((domainreso_StrBinSearchDomain(ajDomainGetId(domain), entryarr, num))==-1) ajDomainWrite(dcfout, domain); /* Delete domain structure. */ ajDomainDel(&domain); } /* Tidy up. */ ajStrDel(&temp2); ajStrDel(&cpdb_name); ajFileClose(&dcfout); ajFileClose(&dcfin); ajListFree(&cpdb_path); ajListFree(&entry); AJFREE(entryarr); /* Return. */ ajExit(); return 0; }
int main(int argc, char **argv) { AjPStr cl = NULL; AjPSeqset seqset = NULL; AjPSeqout seqout = NULL; AjPStr stmp = NULL; AjPStr email = NULL; AjPStr fmt = NULL; AjPStr trtab = NULL; const AjPStr ofn = NULL; AjPStr *applist = NULL; AjBool crc = ajFalse; AjBool alt = ajFalse; AjBool iprlook = ajFalse; AjBool goterms = ajFalse; ajint trlen = 0; ajuint i = 0; ajuint lcnt = 0; AjPStr fn = NULL; AjPFile outf = NULL; embInitPV("eiprscan", argc, argv, "IPRSCAN", VERSION); seqset = ajAcdGetSeqset("sequence"); email = ajAcdGetString("email"); crc = ajAcdGetBoolean("crc"); applist = ajAcdGetList("appl"); fmt = ajAcdGetListSingle("format"); trtab = ajAcdGetListSingle("trtable"); trlen = ajAcdGetInt("trlen"); alt = ajAcdGetBoolean("altjobs"); iprlook = ajAcdGetBoolean("iprlookup"); goterms = ajAcdGetBoolean("goterms"); outf = ajAcdGetOutfile("outfile"); stmp = ajStrNew(); cl = ajStrNewC("iprscan -cli"); fn = ajStrNew(); ajFilenameSetTempname(&fn); seqout = ajSeqoutNew(); if(!ajSeqoutOpenFilename(seqout, fn)) ajFatal("Cannot open temporary file %S",fn); ajSeqoutSetFormatC(seqout, "fasta"); ajSeqoutWriteSet(seqout,seqset); ajSeqoutClose(seqout); ajFmtPrintS(&stmp," -i %S",fn); ajStrAppendS(&cl,stmp); if(!ajSeqsetIsProt(seqset)) ajStrAppendC(&cl," -seqtype n"); if(!crc) ajStrAppendC(&cl," -nocrc"); if(ajStrGetLen(email)) { ajFmtPrintS(&stmp," -email %S",email); ajStrAppendS(&cl,stmp); } i = 0; lcnt = 0; while(applist[i]) { ++lcnt; ++i; } i = 0; while(applist[i]) { if(ajStrMatchC(applist[i],"all")) { if(lcnt != 1) ajFatal("Cannot specify 'all' with multiple " "applications"); ++i; continue; } ajFmtPrintS(&stmp," -appl %S",applist[i]); ajStrAppendS(&cl,stmp); ++i; } ajFmtPrintS(&stmp," -format %S",fmt); ajStrAppendS(&cl,stmp); ajFmtPrintS(&stmp," -trtable %S",trtab); ajStrAppendS(&cl,stmp); ajFmtPrintS(&stmp," -trlen %d",trlen); ajStrAppendS(&cl,stmp); if(alt) ajStrAppendC(&cl," -altjobs"); if(iprlook) ajStrAppendC(&cl," -iprlookup"); if(goterms) ajStrAppendC(&cl," -goterms"); ofn = ajFileGetNameS(outf); ajFmtPrintS(&stmp," -o %S",ofn); ajFileClose(&outf); ajStrAppendS(&cl,stmp); #if 0 ajFmtPrint("%S\n",cl); #endif #if 1 system(ajStrGetPtr(cl)); #endif ajSysFileUnlink(fn); ajStrDel(&cl); ajStrDel(&fn); ajStrDel(&stmp); ajStrDel(&email); ajStrDel(&fmt); ajStrDel(&trtab); ajStrDelarray(&applist); ajSeqoutDel(&seqout); ajSeqsetDel(&seqset); embExit(); return 0; }
int main(int argc, char **argv) { AjPSeqset seqset = NULL; AjPStr cl = NULL; AjPSeqout seqout = NULL; float thresh = 0.; AjBool netphos = ajFalse; AjPStr format = NULL; AjBool plot = ajFalse; AjPStr fn = NULL; AjPStr stmp = NULL; AjPStr outfname = NULL; embInitPV("eyinoyang", argc, argv, "CBSTOOLS",VERSION); seqset = ajAcdGetSeqset("sequence"); outfname= ajAcdGetOutfileName("outfile"); plot = ajAcdGetBoolean("plot"); netphos = ajAcdGetBoolean("netphos"); thresh = ajAcdGetFloat("threshold"); format = ajAcdGetListSingle("format"); cl = ajStrNewS(ajAcdGetpathC("yinOyang")); fn = ajStrNew(); stmp = ajStrNew(); ajFilenameSetTempname(&fn); seqout = ajSeqoutNew(); if(!ajSeqoutOpenFilename(seqout, fn)) ajFatal("Cannot open temporary file %S",fn); ajSeqoutSetFormatC(seqout, "fasta"); ajSeqoutWriteSet(seqout,seqset); ajSeqoutClose(seqout); if(ajStrMatchC(format,"short")) ajStrAppendC(&cl," -f s"); else if(ajStrMatchC(format,"long")) ajStrAppendC(&cl," -f l"); if(plot) ajStrAppendC(&cl," -g"); if(netphos) ajStrAppendC(&cl," -y"); if(netphos) { ajFmtPrintS(&stmp," -t %f",thresh); ajStrAppendS(&cl,stmp); } ajFmtPrintS(&stmp," %S",fn); ajStrAppendS(&cl,stmp); #if 0 ajFmtPrint("%`S\n",cl); #endif #if 1 ajSysExecOutnameAppendS(cl, outfname); #endif ajSysFileUnlinkS(fn); ajStrDel(&cl); ajStrDel(&fn); ajStrDel(&stmp); ajStrDel(&format); ajSeqoutDel(&seqout); ajSeqsetDel(&seqset); ajStrDel(&outfname); embExit(); return 0; }
/* @prog seqnr ************************************************************** ** ** Removes redundancy from DHF files (domain hits files) or other files of ** sequences. ** ****************************************************************************/ int main(int argc, char **argv) { /* Variable declarations */ AjPList in = NULL; /* Names of domain hits files (input). */ AjPStr inname = NULL; /* Full name of the current DHF file. */ AjPFile inf = NULL; /* Current DHF file. */ EmbPHitlist infhits = NULL; /* Hitlist from DHF file */ AjBool dosing = ajFalse; /* Filter using singlet sequences. */ AjPDir singlets = NULL; /* Singlets (input). */ AjBool dosets = ajFalse; /* Filter using sets of sequences. */ AjPDir insets = NULL; /* Sets (input). */ AjPStr mode = NULL; /* Mode of operation */ ajint moden = 0; /* Mode 1: single threshold for redundancy removal, 2: lower and upper thresholds for redundancy removal. */ float thresh = 0.0; /* Threshold for non-redundancy. */ float threshlow = 0.0; /* Threshold (lower limit). */ float threshup = 0.0; /* Threshold (upper limit). */ AjPMatrixf matrix = NULL; /* Substitution matrix. */ float gapopen = 0.0; /* Gap insertion penalty. */ float gapextend = 0.0; /* Gap extension penalty. */ AjPDirout out = NULL; /* Domain hits files (output). */ AjPFile outf = NULL; /* Current DHF file (output). */ AjBool dored = ajFalse; /* True if redundant hits are output. */ AjPDirout outred = NULL; /* DHF files for redundant hits (output).*/ AjPFile redf = NULL; /* Current DHF file redundancy (output). */ AjPStr outname = NULL; /* Name of output file (re-used). */ AjPFile logf = NULL; /* Log file pointer. */ AjBool ok = ajFalse; /* Housekeeping. */ AjPSeqset seqset = NULL; /* Seqset (re-used). */ AjPSeqin seqin = NULL; /* Seqin (re-used). */ AjPList seq_list = NULL; /* Main list for redundancy removal. */ EmbPDmxNrseq seq_tmp = NULL; /* Temp. pointer for making seq_list. */ ajint seq_siz = 0; /* Size of seq_list. */ AjPUint keep = NULL; /* 1: Sequence in seq_list was classed as non-redundant, 0: redundant. */ AjPUint nokeep = NULL; /* Inversion of keep array. */ ajint nseqnr = 0; /* No. non-redundant seqs. in seq_list. */ AjPStr filtername= NULL; /* Name of filter file (re-used). */ AjPFile filterf = NULL; /* Current filter file. */ EmbPHitlist hitlist = NULL; /* Hitlist from input file (re-used). */ AjPScopalg scopalg = NULL; /* Scopalg from input file. */ ajint x = 0; /* Housekeeping. */ /* Read data from acd. */ embInitPV("seqnr",argc,argv,"DOMSEARCH",VERSION); in = ajAcdGetDirlist("dhfinpath"); dosing = ajAcdGetToggle("dosing"); singlets = ajAcdGetDirectory("singletsdir"); dosets = ajAcdGetToggle("dosets"); insets = ajAcdGetDirectory("insetsdir"); mode = ajAcdGetListSingle("mode"); thresh = ajAcdGetFloat("thresh"); threshlow = ajAcdGetFloat("threshlow"); threshup = ajAcdGetFloat("threshup"); matrix = ajAcdGetMatrixf("matrix"); gapopen = ajAcdGetFloat("gapopen"); gapextend = ajAcdGetFloat("gapextend"); out = ajAcdGetOutdir("dhfoutdir"); dored = ajAcdGetToggle("dored"); outred = ajAcdGetOutdir("redoutdir"); logf = ajAcdGetOutfile("logfile"); /* Housekeeping. */ filtername = ajStrNew(); outname = ajStrNew(); if(!(ajStrToInt(mode, &moden))) ajFatal("Could not parse ACD node option"); /* Process each DHF (input) in turn. */ while(ajListPop(in,(void **)&inname)) { ajFmtPrint("Processing %S\n", inname); ajFmtPrintF(logf, "//\n%S\n", inname); seq_list = ajListNew(); keep = ajUintNew(); nokeep = ajUintNew(); /**********************************/ /* Open DHF file */ /**********************************/ if((inf = ajFileNewInNameS(inname)) == NULL) ajFatal("Could not open DHF file %S", inname); /* Read DHF file. */ ok = ajFalse; if(!(infhits = embHitlistReadFasta(inf))) { ajWarn("embHitlistReadFasta call failed in seqnr"); ajFmtPrintF(logf, "embHitlistReadFasta call failed in seqnr\n"); /* Read sequence set instead. */ seqset = ajSeqsetNew(); seqin = ajSeqinNew(); ajSeqinUsa(&seqin, inname); if(!(ajSeqsetRead(seqset, seqin))) ajFatal("SeqsetRead failed in seqsearch_psialigned"); if(ajSeqsetGetSize(seqset)) ok = ajTrue; } else if(infhits->N) ok = ajTrue; /* Close DHF file. */ ajFileClose(&inf); /* Process empty DHF files (should never occur). */ if(!ok) { ajWarn("Empty input file %S\n", inname); ajFmtPrintF(logf, "Empty input file %S\n", inname); if(infhits) embHitlistDel(&infhits); if(seqset) ajSeqsetDel(&seqset); if(seqin) ajSeqinDel(&seqin); continue; } /* 1. Create list of sequences from the main input directory.. */ if(infhits) { for(x=0; x<infhits->N; x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); ajStrAssignS(&seq_tmp->Seq->Acc,infhits->hits[x]->Acc); ajStrAssignS(&seq_tmp->Seq->Seq,infhits->hits[x]->Seq); ajListPushAppend(seq_list,seq_tmp); } } else { for(x=0;x<ajSeqsetGetSize(seqset);x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x)); ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x)); ajListPushAppend(seq_list,seq_tmp); } ajSeqsetDel(&seqset); ajSeqinDel(&seqin); } /**********************************/ /* Open singlets filter file */ /**********************************/ if(dosing) { /* Open singlets file. */ ajStrAssignS(&filtername, inname); ajFilenameTrimPathExt(&filtername); ajStrInsertS(&filtername, 0, ajDirGetPath(singlets)); ajStrAppendC(&filtername, "."); ajStrAppendS(&filtername, ajDirGetExt(singlets)); if((filterf = ajFileNewInNameS(filtername)) == NULL) { ajWarn("Could not open DHF file %S", filtername); ajFmtPrint("Could not open singlets filter file %S", filtername); } else { /* Read DHF file. */ ok = ajFalse; if(!(hitlist = embHitlistReadFasta(filterf))) { ajWarn("embHitlistReadFasta call failed in seqnr"); ajFmtPrintF(logf, "embHitlistReadFasta call failed in seqnr\n"); /* Read sequence set instead. */ seqset = ajSeqsetNew(); seqin = ajSeqinNew(); ajSeqinUsa(&seqin, inname); if(!(ajSeqsetRead(seqset, seqin))) ajFatal("SeqsetRead failed in seqnr"); if(ajSeqsetGetSize(seqset)) ok = ajTrue; } else if(hitlist->N) ok = ajTrue; /* Close DHF file. */ ajFileClose(&filterf); /* Process empty DHF files (should never occur). */ if(!ok) { ajWarn("Empty singlets filter file %S\n", filtername); ajFmtPrintF(logf, "Empty singlets filter file %S\n", filtername); /* No continue this time. */ } /* 2. Add sequences from filter directories to List but mark them up (they are considered in the redundancy calculation but never appear in the output files). */ if(hitlist) { for(x=0; x<hitlist->N; x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc,hitlist->hits[x]->Acc); ajStrAssignS(&seq_tmp->Seq->Seq,hitlist->hits[x]->Seq); ajListPushAppend(seq_list,seq_tmp); } embHitlistDel(&hitlist); } else { for(x=0;x<ajSeqsetGetSize(seqset);x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x)); ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x)); ajListPushAppend(seq_list,seq_tmp); } ajSeqsetDel(&seqset); ajSeqinDel(&seqin); } } } /**********************************/ /* Open sets filter file */ /**********************************/ if(dosets) { /* Open sets file. */ ajStrAssignS(&filtername, inname); ajFilenameTrimPathExt(&filtername); ajStrInsertS(&filtername, 0, ajDirGetPath(insets)); ajStrAppendC(&filtername, "."); ajStrAppendS(&filtername, ajDirGetExt(insets)); if((filterf = ajFileNewInNameS(filtername)) == NULL) { ajWarn("Could not open DAF file %S", filtername); ajFmtPrint("Could not open sets filter file %S", filtername); } else { /* Read DAF file. */ ok = ajFalse; if(!(ajDmxScopalgRead(filterf, &scopalg))) { ajWarn("ajDmxScopalgRead call failed in seqnr"); ajFmtPrintF(logf, "ajDmxScopalgRead call failed in seqnr\n"); /* Read sequence set instead. */ seqset = ajSeqsetNew(); seqin = ajSeqinNew(); ajSeqinUsa(&seqin, inname); if(!(ajSeqsetRead(seqset, seqin))) ajFatal("SeqsetRead failed in seqnr"); if(ajSeqsetGetSize(seqset)) ok = ajTrue; } else if(scopalg->N) ok = ajTrue; /* Close DHF file. */ ajFileClose(&filterf); /* Process empty DHF files (should never occur). */ if(!ok) { ajWarn("Empty sets filter file %S\n", filtername); ajFmtPrintF(logf, "Empty sets filter file %S\n", filtername); /* No continue this time. */ } /* 2. Add sequences from filter directories to List but mark them up (they are considered in the redundancy calculation but never appear in the output files).. */ if(scopalg) { for(x=0; x<scopalg->N; x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc,scopalg->Codes[x]); ajStrAssignS(&seq_tmp->Seq->Seq,scopalg->Seqs[x]); /* Remove gap char's & whitespace. */ ajStrRemoveGap(&seq_tmp->Seq->Seq); ajListPushAppend(seq_list,seq_tmp); } ajDmxScopalgDel(&scopalg); } else { for(x=0;x<ajSeqsetGetSize(seqset);x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x)); ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x)); ajListPushAppend(seq_list,seq_tmp); } ajSeqsetDel(&seqset); ajSeqinDel(&seqin); } } } /* 4. Identify redundant domains.. */ if(moden == 1) { if((!embDmxSeqNR(seq_list, &keep, &nseqnr, matrix, gapopen, gapextend, thresh, ajTrue))) ajFatal("embDmxSeqNR failure in seqnr"); } else { if((!embDmxSeqNRRange(seq_list, &keep, &nseqnr, matrix, gapopen, gapextend, threshlow, threshup, ajTrue))) ajFatal("embDmxSeqNR failure in seqnr"); } seq_siz = ajListGetLength(seq_list); for(x=0; x<seq_siz; x++) if(ajUintGet(keep, x) == 1) ajUintPut(&nokeep, x, 0); else ajUintPut(&nokeep, x, 1); /* Create output files. */ ajStrAssignS(&outname, inname); ajFilenameTrimPathExt(&outname); outf = ajFileNewOutNameDirS(outname, out); if(dored) redf = ajFileNewOutNameDirS(outname, outred); /* 5. Write non-redundant domains to main output directory. 6. If specified, write redundant domains to output directory. */ embHitlistWriteSubsetFasta(outf, infhits, keep); if(dored) embHitlistWriteSubsetFasta(redf, infhits, nokeep); embHitlistDel(&infhits); ajFileClose(&outf); ajFileClose(&redf); ajStrDel(&inname); while(ajListPop(seq_list, (void **) &seq_tmp)) { ajSeqDel(&seq_tmp->Seq); AJFREE(seq_tmp); } ajListFree(&seq_list); ajUintDel(&keep); ajUintDel(&nokeep); } /* Tidy up. */ ajListFree(&in); if(singlets) ajDirDel(&singlets); if(insets) ajDirDel(&insets); ajDiroutDel(&out); if(outred) ajDiroutDel(&outred); ajFileClose(&logf); ajMatrixfDel(&matrix); ajStrDel(&filtername); ajStrDel(&outname); ajStrDel(&mode); embExit(); return 0; }
int main(int argc, char **argv) { /* ACD data item variables */ AjPSeqset alignfile = NULL; AjPFile prior = NULL; AjPFile null = NULL; AjPFile pam = NULL; float pamwgt = 0.0; AjPStr nhmm = NULL; AjPStr strategy = NULL; ajint pbswitch = 0; float archpri = 0.0; AjBool binary = ajFalse; AjBool fast = ajFalse; float gapmax = 0.0; AjBool hand = ajFalse; float idlevel = 0.0; AjBool noeff = ajFalse; float swentry = 0.0; float swexit = 0.0; AjBool verbosity = ajFalse; AjPStr weighting = NULL; AjPFile hmmfile = NULL; AjPFile o = NULL; AjPFile cfile = NULL; /* Housekeeping variables */ AjPStr cmd = NULL; AjPStr rnd1 = NULL; AjPStr rnd2 = NULL; AjPStr fmt = NULL; char option; AjBool fmtok = ajFalse; AjPStr hmmfilename = NULL; /* ACD file processing */ embInitPV("ehmmbuild",argc,argv,"HMMERNEW",VERSION); alignfile = ajAcdGetSeqset("alignfile"); prior = ajAcdGetInfile("prior"); null = ajAcdGetInfile("null"); pam = ajAcdGetInfile("pam"); pamwgt = ajAcdGetFloat("pamwgt"); nhmm = ajAcdGetString("nhmm"); strategy = ajAcdGetListSingle("strategy"); pbswitch = ajAcdGetInt("pbswitch"); archpri = ajAcdGetFloat("archpri"); binary = ajAcdGetBoolean("binary"); fast = ajAcdGetBoolean("fast"); gapmax = ajAcdGetFloat("gapmax"); hand = ajAcdGetBoolean("hand"); idlevel = ajAcdGetFloat("sidlevel"); noeff = ajAcdGetBoolean("noeff"); swentry = ajAcdGetFloat("swentry"); swexit = ajAcdGetFloat("swexit"); verbosity = ajAcdGetBoolean("verbosity"); weighting = ajAcdGetListSingle("weighting"); hmmfile = ajAcdGetOutfile("hmmfile"); o = ajAcdGetOutfile("o"); cfile = ajAcdGetOutfile("cfile"); /* MAIN APPLICATION CODE */ /* 1. Housekeeping */ cmd = ajStrNew(); rnd1 = ajStrNew(); rnd2 = ajStrNew(); fmt = ajStrNew(); hmmfilename = ajStrNew(); ajStrAssignC(&hmmfilename, ajFileGetNameC(hmmfile)); /* 2. Ensure alignfile is in format HMMER can understand. These include FASTA, GENBANK,EMBL, GCG, PIR, STOCKHOLM, SELEX, MSF,CLUSTAL and PHYLIP. EMBOSS name definitions are taken from seqInFormatDef in ajseqread.c and seqOutFormat in ajseqwrite.c */ fmtok=ajFalse; ajStrAssignS(&fmt, ajSeqsetGetFormat(alignfile)); if(ajStrMatchC(fmt, "fasta") || ajStrMatchC(fmt, "genbank") || ajStrMatchC(fmt, "embl") || ajStrMatchC(fmt, "gcg") || ajStrMatchC(fmt, "pir") || ajStrMatchC(fmt, "stockholm")|| ajStrMatchC(fmt, "selex") || ajStrMatchC(fmt, "msf") || ajStrMatchC(fmt, "clustal") || ajStrMatchC(fmt, "phylip")) fmtok = ajTrue; /* This could be replaced with code to reformat the file. */ if(!fmtok) ajFatal("Input alignment ('alignfile' ACD option) is not in format " "HMMER understands. Please use a a file in FASTA, GENBANK, " "EMBL, GCG, PIR, STOCKHOLM, SELEX, MSF,CLUSTAL or PHYLIP format."); /* 3. Build hmmbuild command line */ /* Command line is built in this order: i. Application name. ii. HMMER 'options' (in order they appear in ACD file) iii.HMMER 'options' (that don't appear in ACD file) iv. HMMER & new parameters. */ ajStrAssignS(&cmd, ajAcdGetpathC("hmmbuild")); if(prior) ajFmtPrintAppS(&cmd, " --prior %s ", ajFileGetNameC(prior)); if(null) ajFmtPrintS(&cmd, " --null %s ", ajFileGetNameC(null)); if(pam) ajFmtPrintAppS(&cmd, " --pam %s --pamwgt %f ", ajFileGetNameC(pam), pamwgt); ajFmtPrintAppS(&cmd, " -n %S ", nhmm); /* ACD option only allows one selection */ option = ajStrGetCharFirst(strategy); if(option == 'F') ajStrAppendC(&cmd, " -f "); else if(option == 'G') ajStrAppendC(&cmd, " -g "); else if(option == 'S') ajStrAppendC(&cmd, " -s "); /* else go with default ('D' option in ACD file) */ ajFmtPrintAppS(&cmd, " --pbswitch %d ", pbswitch); ajFmtPrintAppS(&cmd, " --archpri %f ", archpri); if(binary) ajStrAppendC(&cmd, " --binary "); if(fast) ajFmtPrintAppS(&cmd, " --fast --gapmax %f ", gapmax); if(hand) ajStrAppendC(&cmd, " --hand "); ajFmtPrintAppS(&cmd, " --idlevel %f ", idlevel); if(noeff) ajStrAppendC(&cmd, " --noeff "); ajFmtPrintAppS(&cmd, " --swentry %f ", swentry); ajFmtPrintAppS(&cmd, " --swexit %f ", swexit); if(verbosity) ajStrAppendC(&cmd, " --verbose "); /* ACD option only allows one selection */ option = ajStrGetCharFirst(weighting); if(option == 'B') ajStrAppendC(&cmd, " --wblosum "); else if(option == 'G') ajStrAppendC(&cmd, " --wgsc "); else if(option == 'K') ajStrAppendC(&cmd, " --wme "); else if(option == 'W') ajStrAppendC(&cmd, " --wpb "); else if(option == 'V') ajStrAppendC(&cmd, " --wvoronoi "); else if(option == 'N') ajStrAppendC(&cmd, " --wnone "); if(o) ajFmtPrintAppS(&cmd, " -o %s ", ajFileGetNameC(o)); if(cfile) ajFmtPrintAppS(&cmd, " --cfile %s ", ajFileGetNameC(cfile)); /* -A (append) always set but file will be wiped by EMBOSS first unless append: "Y" is set for "hmmfile" in the ACD file. */ ajStrAppendC(&cmd, " -A -F "); ajFmtPrintAppS(&cmd, " %S %S", hmmfilename, ajSeqsetGetFilename(alignfile)); /* 4. Close ACD files */ ajSeqsetDel(&alignfile); ajFileClose(&prior); ajFileClose(&null); ajFileClose(&pam); ajFileClose(&hmmfile); ajFileClose(&o); ajFileClose(&cfile); /* 5. Call hmmbuild */ ajFmtPrint("\n%S\n", cmd); ajSysExecS(cmd); /* 6. Exit cleanly */ ajStrDel(&nhmm); ajStrDel(&cmd); ajStrDel(&rnd1); ajStrDel(&rnd2); ajStrDel(&fmt); ajStrDel(&hmmfilename); ajStrDel(&strategy); ajStrDel(&weighting); embExit(); return 0; }