AjBool gFormatGenbank(AjPSeq seq, AjPStr *inseq){ AjPSeqout seqout = NULL; AjPFeattabOut featout = NULL; AjPFeattable feat = NULL; AjPStr seqline = NULL; AjPStr featline = NULL; AjPFile seqfile = NULL; AjPFile featfile = NULL; AjPStr filename = NULL; gAssignUniqueName(&filename); feat = ajSeqGetFeatCopy(seq); if(!feat) return ajFalse; seqout = ajSeqoutNew(); if(!ajSeqoutOpenFilename(seqout,filename)) embExitBad(); ajSeqoutSetFormatS(seqout,ajStrNewC("genbank")); ajSeqoutWriteSeq(seqout,seq); ajSeqoutClose(seqout); ajSeqoutDel(&seqout); seqfile = ajFileNewInNameS(filename); ajSysFileUnlinkS(filename); featout = ajFeattabOutNew(); if(!ajFeattabOutOpen(featout,filename)) return ajFalse; ajFeattableWriteGenbank(featout,feat); ajFeattableDel(&feat); //ajFeattabOutDel(&featout); ajFileClose(&(featout->Handle)); featfile = ajFileNewInNameS(filename); ajSysFileUnlinkS(filename); while(ajReadline(seqfile,&seqline)){ if(ajStrMatchC(seqline,"ORIGIN\n")){ while(ajReadline(featfile,&featline)){ ajStrAppendS(inseq, featline); } } ajStrAppendS(inseq, seqline); } ajStrDel(&seqline); ajStrDel(&featline); ajStrDel(&filename); ajFileClose(&seqfile); ajFileClose(&featfile); return ajTrue; }
static void remap_read_file_of_enzyme_names(AjPStr *enzymes) { AjPFile file = NULL; AjPStr line; const char *p = NULL; if(ajStrFindC(*enzymes, "@") == 0) { ajStrTrimC(enzymes, "@"); /* remove the @ */ file = ajFileNewInNameS(*enzymes); if(file == NULL) ajFatal("Cannot open the file of enzyme names: '%S'", enzymes); /* blank off the enzyme file name and replace with the enzyme names */ ajStrSetClear(enzymes); line = ajStrNew(); while(ajReadlineTrim(file, &line)) { p = ajStrGetPtr(line); if(!*p || *p == '#' || *p == '!') continue; ajStrAppendS(enzymes, line); ajStrAppendC(enzymes, ","); } ajStrDel(&line); ajFileClose(&file); } return; }
static ajuint jaspscan_readmatrix(const AjPStr mfname, float ***matrix) { AjPFile inf = NULL; AjPStr line = NULL; ajuint i = 0; ajuint cols = 0; AJCNEW0(*matrix,4); line = ajStrNew(); inf = ajFileNewInNameS(mfname); if(!inf) ajFatal("Cannot open matrix file %S",mfname); i = 0; while(ajReadlineTrim(inf,&line)) { if(!i) cols = ajStrParseCountC(line," \n"); (*matrix)[i++] = ajArrFloatLine(line," \n",1,cols); } ajStrDel(&line); ajFileClose(&inf); return cols;; }
static void jaspextract_readmatrixlist(AjPTable mtable, const AjPStr directory) { const AjPStr datadir = NULL; AjPStr matrixfile = NULL; AjPFile inf = NULL; AjPStr line = NULL; AjPStr key = NULL; AjPStr value = NULL; matrixfile = ajStrNew(); datadir = ajDatafileValuePath(); if(!datadir) ajFatal("jaspextract: Cannot determine the EMBOSS data directory"); ajFmtPrintS(&matrixfile,"%S%s",directory,MATRIXFILE); if(!ajFilenameExistsRead(matrixfile)) ajFatal("jaspextract: Directory (%S) doesn't appear to be a JASPAR " "one\nNo matrix_list.txt file found",directory); inf = ajFileNewInNameS(matrixfile); if(!inf) ajFatal("Cannot open input file: %S",matrixfile); while(ajReadline(inf,&line)) { key = ajStrNew(); if(ajFmtScanS(line,"%S",&key) != 1) { ajStrDel(&key); continue; } value = ajStrNew(); ajStrAssignS(&value,line); ajTablePut(mtable,(void *)key, (void *)value); } ajFileClose(&inf); ajStrDel(&matrixfile); ajStrDel(&line); return; }
AjBool gGetFileContent(AjPStr* content, AjPStr filename){ AjPFile file = NULL; AjPStr line = NULL; if((file = ajFileNewInNameS(filename)) == NULL) return ajFalse; while(ajReadline(file, &line)) ajStrAppendS(content, line); if(file) ajFileClose(&file); ajSysFileUnlinkS(filename); return ajTrue; }
static PMemFile dbiblast_memfopenfile(const AjPStr name) { PMemFile ret; AjPFile fp; fp = ajFileNewInNameS(name); if(!fp) return NULL; AJNEW0(ret); ajStrAssignS(&ret->Name, name); ret->IsMem = 0; ret->File = fp; ret->Size = 0; ret->Mem = NULL; ajDebug("fopened '%S'\n", name); return ret; }
static void notseq_readfile(const AjPStr exclude, AjPStr *pattern) { AjPFile file = NULL; AjPStr line; AjPStr filename = NULL; const char *p = NULL; if(ajStrFindC(exclude, "@") != 0) { ajStrAssignS(pattern, exclude); } else { ajStrAssignS(&filename, exclude); ajStrTrimC(&filename, "@"); /* remove the @ */ file = ajFileNewInNameS(filename); if(file == NULL) ajFatal("Cannot open the file of sequence names: '%S'", filename); /* blank off the file name and replace with the sequence names */ ajStrSetClear(pattern); line = ajStrNew(); while(ajReadlineTrim(file, &line)) { p = ajStrGetPtr(line); if(!*p || *p == '#' || *p == '!') continue; ajStrAppendS(pattern, line); ajStrAppendC(pattern, ","); } ajStrDel(&line); ajStrDel(&filename); ajFileClose(&file); } return; }
AjPFilebuff gHttpPostFileSS(AjPStr url, AjPStr filename) { AjPFilebuff buff = NULL; AjPFile file = NULL; AjPStr line = NULL; AjPStr cont = NULL; AjPStr host = NULL; AjPStr path = NULL; AjPStr post = NULL; AjPStr body = NULL; ajint port = 80; ajuint http = 0; FILE *fp; char crlf[] = "\015\021"; AjOSysSocket sock; AjOSysTimeout timo; post = ajStrNew(); body = ajStrNew(); cont = ajStrNew(); file = ajFileNewInNameS(filename); while(ajReadline(file, &line)) { ajStrAppendS(&cont, line); } ajHttpUrlDeconstruct(url, &port, &host, &path); while(buff==NULL || ajHttpRedirect(buff, &host, &port, &path, &http)) { if(ajStrGetCharFirst(path) != '/') ajStrInsertK(&path, 0, '/'); ajFmtPrintS( &body, "--xYzZY\015\012" "Content-Disposition: form-data; name=\"file\";" " filename=\"%S\"\015\012" "Content-Type: text/plain\015\012" "%S\015\012" "\015\012--xYzZY--\015\012", filename, cont ); ajFmtPrintS( &post, "POST http://%S%S\n" "Content-Length: %d\n" "Content-Type: multipart/form-data; boundary=xYzZY\n\n" "%S", host, path, ajStrGetLen(body), body ); ajFmtPrint("%S", post); fp = ajHttpOpen(NULL, host, port, post, &sock); buff = ajFilebuffNewFromCfile(fp); if(!buff) return NULL; } ajStrDel(&post); timo.seconds = 180; ajSysTimeoutSet(&timo); ajFilebuffLoadAll(buff); ajSysTimeoutUnset(&timo); return buff; }
int main(int argc, char **argv) { AjPList sigin = NULL; /* Signature input file names. */ AjPStr signame = NULL; /* Name of signature file. */ AjPFile sigf = NULL; /* Signature input file. */ EmbPSignature sig = NULL; /* Signature. */ AjPList siglist = NULL; /* List of signatures. */ AjIList sigiter = NULL; /* Iterator for siglist. */ AjBool sigok = ajFalse; /* True if signature processed ok. */ EmbPHit hit = NULL; /* Hit to store signature-sequence match. */ AjPList hits = NULL; /* List of hits */ AjPList ligands = NULL; /* List of top-scoring ligands. */ AjPSeqall database=NULL; /* Protein sequences to match signature against. */ AjPSeq seq = NULL; /* Current sequence. */ AjPMatrixf sub =NULL; /* Residue substitution matrix. */ float gapo =0.0; /* Gap insertion penalty. */ float gape =0.0; /* Gap extension penalty. */ AjPStr nterm=NULL; /* Holds N-terminal matching options from acd. */ ajint ntermi=0; /* N-terminal option as int. */ AjPFile hitsf =NULL; /* Hits output file. sequence matches. */ AjPDirout hitsdir=NULL; /* Directory of hits files (output). */ AjPFile alignf =NULL; /* Alignment output file. */ AjPDirout aligndir=NULL; /* Directory of alignment files (output). */ AjPFile resultsf =NULL; /* Results file (output). */ AjPDirout resultsdir=NULL; /* Directory of results files (output). */ AjPStr mode = NULL; /* Mode, 1: Patch score mode, 2: Site score mode. */ ajint modei = 0; /* Selected mode as integer. */ SigPLighit lighit = NULL; embInitPV("sigscanlig", argc, argv, "SIGNATURE",VERSION); /* GET VALUES FROM ACD */ sigin = ajAcdGetDirlist("siginfilesdir"); database = ajAcdGetSeqall("dbseqall"); sub = ajAcdGetMatrixf("sub"); gapo = ajAcdGetFloat("gapo"); gape = ajAcdGetFloat("gape"); nterm = ajAcdGetListSingle("nterm"); hitsdir = ajAcdGetOutdir("hitsoutdir"); aligndir = ajAcdGetOutdir("alignoutdir"); resultsdir = ajAcdGetOutdir("resultsoutdir"); mode = ajAcdGetListSingle("mode"); /*Assign N-terminal matching option etc. */ ajFmtScanS(nterm, "%d", &ntermi); modei = (ajint) ajStrGetCharFirst(mode)-48; /* READ & PROCESS SIGNATURES */ siglist = ajListNew(); while(ajListPop(sigin, (void **) &signame)) { /* Read signature files, compile signatures and populate list. */ sigok = ajFalse; if((sigf = ajFileNewInNameS(signame))) if((sig = embSignatureReadNew(sigf))) if(embSignatureCompile(&sig, gapo, gape, sub)) { sigok=ajTrue; ajListPushAppend(siglist, sig); /* ajFmtPrint("Id: %S\nDomid: %S\nLigid: %S\nns: %d\n" "sn: %d\nnp: %d\npn: %d\nminpatch: %d\n" "maxgap: %d\n", sig->Id, sig->Domid, sig->Ligid, sig->ns, sig->sn, sig->np, sig->pn, sig->minpatch, sig->maxgap); */ } if(!sigok) { ajWarn("Could not process %S", signame); embSignatureDel(&sig); ajFileClose(&sigf); ajStrDel(&signame); continue; } ajFileClose(&sigf); ajStrDel(&signame); } ajListFree(&sigin); /* ALIGN EACH QUERY SEQUENCE TO LIST OF SIGNATURE */ while(ajSeqallNext(database, &seq)) { /* Do sequence-signature alignment and save results */ hits = ajListNew(); sigiter = ajListIterNew(siglist); while((sig = (EmbPSignature) ajListIterGet(sigiter))) { if(embSignatureAlignSeq(sig, seq, &hit, ntermi)) { hit->Sig = sig; ajListPushAppend(hits, hit); hit=NULL; /* To force reallocation by embSignatureAlignSeq */ } /* There has to be a hit for each signature for correct generation of the LHF by sigscanlig_WriteFasta. So push an empty hit if necessary. 'hit'=NULL forces reallocation by embSignatureAlignSeq. */ /* else { hit = embHitNew(); ajListPushAppend(hits, hit); hit=NULL; } */ } ajListIterDel(&sigiter); /* Rank-order the list of hits by score */ ajListSort(hits, embMatchinvScore); /* Write ligand hits & alignment files (output) */ hitsf = ajFileNewOutNameDirS(ajSeqGetNameS(seq), hitsdir); alignf = ajFileNewOutNameDirS(ajSeqGetNameS(seq), aligndir); resultsf = ajFileNewOutNameDirS(ajSeqGetNameS(seq), resultsdir); /* if((!sigscanlig_WriteFasta(hitsf, siglist, hits))) ajFatal("Bad args to sigscanlig_WriteFasta"); */ if((!sigscanlig_WriteFasta(hitsf, hits))) ajFatal("Bad args to sigscanlig_WriteFasta"); if((!sigscanlig_SignatureAlignWriteBlock(alignf, hits))) ajFatal("Bad args to sigscanlig_SignatureAlignWriteBlock"); /* if((!sigscanlig_SignatureAlignWriteBlock(alignf, siglist, hits))) ajFatal("Bad args to sigscanlig_SignatureAlignWriteBlock"); */ /* Sort list of hits by ligand type and site number. Process list of ligands and print out. */ ajListSortTwo(hits, embMatchLigid, embMatchSN); if(modei==1) ligands = sigscanlig_score_ligands_patch(hits); else if(modei==2) ligands = sigscanlig_score_ligands_site(hits); else ajFatal("Unrecognised mode"); sigscanlig_WriteResults(ligands, resultsf); ajFileClose(&hitsf); ajFileClose(&alignf); ajFileClose(&resultsf); /* Memory management */ while(ajListPop(hits, (void **) &hit)) embHitDel(&hit); ajListFree(&hits); while(ajListPop(ligands, (void **) &lighit)) sigscanlig_LigHitDel(&lighit); ajListFree(&ligands); } /* MEMORY MANAGEMENT */ while(ajListPop(siglist, (void **) &sig)) embSignatureDel(&sig); ajListFree(&siglist); ajSeqallDel(&database); ajMatrixfDel(&sub); ajStrDel(&nterm); ajDiroutDel(&hitsdir); ajDiroutDel(&aligndir); ajDiroutDel(&resultsdir); ajStrDel(&mode); embExit(); return 0; }
int main(int argc, char **argv) { embInitPV("kmafft", argc, argv, "KBWS", "1.0.8"); struct soap soap; struct ns1__mafftInputParams params; char* jobid; char* result; AjPSeqall seqall; AjPSeq seq; AjPFile outf; AjPStr substr; AjPStr inseq = NULL; AjPStr strategy; AjPStr outorder; float op; float ep; AjPStr scorematrix; AjBool homologs; AjBool showhomologs; float threshold; AjPStr referenceseq; AjPStr harrplot; strategy = ajAcdGetString("strategy"); outorder = ajAcdGetString("outorder"); op = ajAcdGetFloat("op"); ep = ajAcdGetFloat("ep"); scorematrix = ajAcdGetString("scorematrix"); homologs = ajAcdGetBoolean("homologs"); showhomologs = ajAcdGetBoolean("showhomologs"); threshold = ajAcdGetFloat("threshold"); referenceseq = ajAcdGetString("referenceseq"); harrplot = ajAcdGetString("harrplot"); seqall = ajAcdGetSeqall("seqall"); outf = ajAcdGetOutfile("outfile"); params.strategy = ajCharNewS(strategy); params.outorder = ajCharNewS(outorder); params.op = op; params.ep = ep; params.scorematrix = ajCharNewS(scorematrix); if (homologs) { params.homologs = xsd__boolean__true_; } else { params.homologs = xsd__boolean__false_; } if (showhomologs) { params.showhomologs = xsd__boolean__true_; } else { params.showhomologs = xsd__boolean__false_; } params.threshold = threshold; params.referenceseq = ajCharNewS(referenceseq); params.harrplot = ajCharNewS(harrplot); AjPStr tmp = NULL; AjPStr tmpFileName = NULL; AjPSeqout fil_file; AjPStr line = NULL; /* if "AjPStr line; -> ajReadline is not success!" */ AjPStr sizestr = NULL; ajint thissize; ajint nb = 0; AjBool are_prot = ajFalse; ajint size = 0; AjPFile infile; tmp = ajStrNewC("fasta"); fil_file = ajSeqoutNew(); tmpFileName = getUniqueFileName(); if( !ajSeqoutOpenFilename(fil_file, tmpFileName) ) { embExitBad(); } ajSeqoutSetFormatS(fil_file, tmp); while (ajSeqallNext(seqall, &seq)) { if (!nb) { are_prot = ajSeqIsProt(seq); } ajSeqoutWriteSeq(fil_file, seq); ++nb; } ajSeqoutClose(fil_file); ajSeqoutDel(&fil_file); if (nb < 2) { ajFatal("Multiple alignments need at least two sequences"); } infile = ajFileNewInNameS(tmpFileName); while (ajReadline(infile, &line)) { ajStrAppendS(&inseq,line); ajStrAppendC(&inseq,"\n"); } soap_init(&soap); char* in0; in0 = ajCharNewS(inseq); if ( soap_call_ns1__runMafft( &soap, NULL, NULL, in0, ¶ms, &jobid ) == SOAP_OK ) { fprintf(stderr,"Jobid: %s\n",jobid); } else { soap_print_fault(&soap, stderr); } int check = 0; while ( check == 0 ) { if ( soap_call_ns1__checkStatus( &soap, NULL, NULL, jobid, &check ) == SOAP_OK ) { fprintf(stderr,"*"); } else { soap_print_fault(&soap, stderr); } sleep(3); } fprintf(stderr,"\n"); if ( soap_call_ns1__getResult( &soap, NULL, NULL, jobid, &result ) == SOAP_OK ) { substr = ajStrNewC(result); ajFmtPrintF(outf,"%S\n",substr); } else { soap_print_fault(&soap, stderr); } ajSysFileUnlinkS(tmpFileName); soap_destroy(&soap); soap_end(&soap); soap_done(&soap); ajFileClose(&outf); ajSeqallDel(&seqall); ajSeqDel(&seq); ajStrDel(&substr); embExit(); return 0; }
int main(int argc, char **argv) { // initialize EMBASSY info embInitPV("kweblogo", argc, argv, "KBWS", "1.0.9"); // soap driver and parameter object struct soap soap; struct ns1__weblogoInputParams params; char* jobid; AjPSeqall seqall; AjPSeq seq; AjPStr substr; AjPStr inseq = NULL; // get input sequence seqall= ajAcdGetSeqall("seqall"); // get/set parameters params.format = ajCharNewS(ajAcdGetString("format")); AjPStr tmp= NULL; AjPStr tmpFileName= NULL; AjPSeqout fil_file; AjPStr line= NULL; /* if "AjPStr line; -> ajReadline is not success!" */ AjPStr sizestr= NULL; ajint thissize; ajint nb= 0; AjBool are_prot= ajFalse; ajint size= 0; AjPFile infile; AjPFile goutf; AjPStr goutfile; goutfile= ajAcdGetString("goutfile"); tmp= ajStrNewC("fasta"); fil_file= ajSeqoutNew(); tmpFileName= getUniqueFileName(); if(!ajSeqoutOpenFilename(fil_file, tmpFileName)) { embExitBad(); } ajSeqoutSetFormatS(fil_file, tmp); while (ajSeqallNext(seqall, &seq)) { if (!nb) { are_prot = ajSeqIsProt(seq); } ajSeqoutWriteSeq(fil_file, seq); ++nb; } ajSeqoutClose(fil_file); ajSeqoutDel(&fil_file); if (nb < 2) { ajFatal("Multiple alignments need at least two sequences"); } infile = ajFileNewInNameS(tmpFileName); while (ajReadline(infile, &line)) { ajStrAppendS(&inseq,line); ajStrAppendC(&inseq,"\n"); } soap_init(&soap); char* in0; in0= ajCharNewS(inseq); if (soap_call_ns1__runWeblogo( &soap, NULL, NULL, in0, ¶ms, &jobid) == SOAP_OK) { } else { soap_print_fault(&soap, stderr); } int check= 0; while (check == 0 ) { if (soap_call_ns1__checkStatus(&soap, NULL, NULL, jobid, &check) == SOAP_OK) { } else { soap_print_fault(&soap, stderr); } sleep(3); } char* image_url; if (soap_call_ns1__getResult(&soap, NULL, NULL, jobid, &image_url) == SOAP_OK) { goutf= ajFileNewOutNameS(goutfile); if (!goutf) { // can not open image output file ajFmtError("Problem writing out image file"); embExitBad(); } if (!gHttpGetBinC(image_url, &goutf)) { // can not download image file ajFmtError("Problem downloading image file"); embExitBad(); } } else { soap_print_fault(&soap, stderr); } // delete temporary multi-fasta sequence file ajSysFileUnlinkS(tmpFileName); // destruct SOAP driver soap_destroy(&soap); soap_end(&soap); soap_done(&soap); // destruct EMBOSS object ajSeqallDel(&seqall); ajSeqDel(&seq); ajStrDel(&substr); // exit embExit(); return 0; }
int main(int argc, char **argv) { EmbPBtreeEntry entry = NULL; AjPStr dbname = NULL; AjPStr dbrs = NULL; AjPStr release = NULL; AjPStr datestr = NULL; AjBool statistics; AjBool compressed; AjPStr directory; AjPStr indexdir; AjPStr filename; AjPStr exclude; AjPStr dbtype = NULL; AjPFile outf = NULL; AjPStr *fieldarray = NULL; ajint nfields; ajint nfiles; AjPStr tmpstr = NULL; AjPStr thysfile = NULL; ajint i; AjPFile inf = NULL; AjPStr word = NULL; AjPBtId idobj = NULL; AjPBtPri priobj = NULL; AjPBtHybrid hyb = NULL; ajulong nentries = 0L; ajulong ientries = 0L; AjPTime starttime = NULL; AjPTime begintime = NULL; AjPTime nowtime = NULL; ajlong startclock = 0; ajlong beginclock = 0; ajlong nowclock = 0; ajulong idcache=0L, idread = 0L, idwrite = 0L, idsize= 0L; ajulong accache=0L, acread = 0L, acwrite = 0L, acsize= 0L; ajulong svcache=0L, svread = 0L, svwrite = 0L, svsize= 0L; ajulong kwcache=0L, kwread = 0L, kwwrite = 0L, kwsize= 0L; ajulong decache=0L, deread = 0L, dewrite = 0L, desize= 0L; ajulong txcache=0L, txread = 0L, txwrite = 0L, txsize= 0L; double tdiff = 0.0; ajint days = 0; ajint hours = 0; ajint mins = 0; embInit("dbxflat", argc, argv); dbtype = ajAcdGetListSingle("idformat"); fieldarray = ajAcdGetList("fields"); directory = ajAcdGetDirectoryName("directory"); outf = ajAcdGetOutfile("outfile"); indexdir = ajAcdGetOutdirName("indexoutdir"); filename = ajAcdGetString("filenames"); exclude = ajAcdGetString("exclude"); dbname = ajAcdGetString("dbname"); dbrs = ajAcdGetString("dbresource"); release = ajAcdGetString("release"); datestr = ajAcdGetString("date"); statistics = ajAcdGetBoolean("statistics"); compressed = ajAcdGetBoolean("compressed"); entry = embBtreeEntryNew(); if(compressed) embBtreeEntrySetCompressed(entry); tmpstr = ajStrNew(); idobj = ajBtreeIdNew(); priobj = ajBtreePriNew(); hyb = ajBtreeHybNew(); nfields = embBtreeSetFields(entry,fieldarray); embBtreeSetDbInfo(entry,dbname,dbrs,datestr,release,dbtype,directory, indexdir); for(i=0; i< nfields; i++) { if(ajStrMatchC(fieldarray[i], "acc")) { accfield = embBtreeGetFieldS(entry, fieldarray[i]); if(compressed) embBtreeFieldSetCompressed(accfield); } else if(ajStrMatchC(fieldarray[i], "sv")) { svfield = embBtreeGetFieldS(entry, fieldarray[i]); if(compressed) embBtreeFieldSetCompressed(svfield); } else if(ajStrMatchC(fieldarray[i], "des")) { desfield = embBtreeGetFieldS(entry, fieldarray[i]); if(compressed) embBtreeFieldSetCompressed(desfield); } else if(ajStrMatchC(fieldarray[i], "key")) { keyfield = embBtreeGetFieldS(entry, fieldarray[i]); if(compressed) embBtreeFieldSetCompressed(keyfield); } else if(ajStrMatchC(fieldarray[i], "org")) { orgfield = embBtreeGetFieldS(entry, fieldarray[i]); if(compressed) embBtreeFieldSetCompressed(orgfield); } else if(!ajStrMatchC(fieldarray[i], "id")) ajErr("Unknown field '%S' specified for indexing", fieldarray[i]); } embBtreeGetRsInfo(entry); nfiles = embBtreeGetFiles(entry,directory,filename,exclude); if(!nfiles) ajDie("No input files in '%S' matched filename '%S'", directory, filename); embBtreeWriteEntryFile(entry); embBtreeOpenCaches(entry); starttime = ajTimeNewToday(); ajFmtPrintF(outf, "Processing directory: %S\n", directory); for(i=0;i<nfiles;++i) { begintime = ajTimeNewToday(); beginclock = ajClockNow(); ajListPop(entry->files,(void **)&thysfile); ajListPushAppend(entry->files,(void *)thysfile); ajFmtPrintS(&tmpstr,"%S%S",entry->directory,thysfile); if(!(inf=ajFileNewInNameS(tmpstr))) ajFatal("Cannot open input file %S\n",tmpstr); ajFilenameTrimPath(&tmpstr); ajFmtPrintF(outf,"Processing file: %S",tmpstr); ientries = 0L; while(dbxflat_NextEntry(entry,inf)) { ++ientries; if(entry->do_id) { if(ajStrGetLen(entry->id) > entry->idlen) { if(ajStrGetLen(entry->id) > maxidlen) { ajWarn("id '%S' too long, truncating to idlen %d", entry->id, entry->idlen); maxidlen = ajStrGetLen(entry->id); } idtrunc++; ajStrKeepRange(&entry->id,0,entry->idlen-1); } ajStrFmtLower(&entry->id); ajStrAssignS(&hyb->key1,entry->id); hyb->dbno = i; hyb->offset = entry->fpos; hyb->dups = 0; ajBtreeHybInsertId(entry->idcache,hyb); ++idtot; } if(accfield) { while(ajListPop(accfield->data,(void **)&word)) { ajStrFmtLower(&word); ajStrAssignS(&hyb->key1,word); hyb->dbno = i; hyb->offset = entry->fpos; hyb->dups = 0; ajBtreeHybInsertId(accfield->cache,hyb); ++acctot; ajStrDel(&word); } } if(svfield) { while(ajListPop(svfield->data,(void **)&word)) { ajStrFmtLower(&word); ajStrAssignS(&hyb->key1,word); hyb->dbno = i; hyb->offset = entry->fpos; hyb->dups = 0; ajBtreeHybInsertId(svfield->cache,hyb); ++svtot; ajStrDel(&word); } } if(keyfield) { while(ajListPop(keyfield->data,(void **)&word)) { ajStrFmtLower(&word); ajStrAssignS(&priobj->id,entry->id); ajStrAssignS(&priobj->keyword,word); priobj->treeblock = 0; ajBtreeInsertKeyword(keyfield->cache, priobj); ++keytot; ajStrDel(&word); } } if(desfield) { while(ajListPop(desfield->data,(void **)&word)) { ajStrFmtLower(&word); ajStrAssignS(&priobj->id,entry->id); ajStrAssignS(&priobj->keyword,word); priobj->treeblock = 0; if(ajBtreeInsertKeyword(desfield->cache, priobj)) ++destot; ajStrDel(&word); } } if(orgfield) { while(ajListPop(orgfield->data,(void **)&word)) { ajStrFmtLower(&word); ajStrAssignS(&priobj->id,entry->id); ajStrAssignS(&priobj->keyword,word); priobj->treeblock = 0; ajBtreeInsertKeyword(orgfield->cache, priobj); ++orgtot; ajStrDel(&word); } } } ajFileClose(&inf); nentries += ientries; nowtime = ajTimeNewToday(); nowclock = ajClockNow(); ajFmtPrintF(outf, " entries: %Lu (%Lu) time: %.1f/%.1fs (%.1f/%.1fs)\n", nentries, ientries, ajClockDiff(startclock,nowclock), ajTimeDiff(starttime, nowtime), ajClockDiff(beginclock,nowclock), ajTimeDiff(begintime, nowtime)); if(statistics) { if(entry->do_id) ajBtreeCacheStatsOut(outf, entry->idcache, &idcache, &idread, &idwrite, &idsize); if(accfield) ajBtreeCacheStatsOut(outf, accfield->cache, &accache, &acread, &acwrite, &acsize); if(svfield) ajBtreeCacheStatsOut(outf, svfield->cache, &svcache, &svread, &svwrite, &svsize); if(keyfield) ajBtreeCacheStatsOut(outf, keyfield->cache, &kwcache, &kwread, &kwwrite, &kwsize); if(desfield) ajBtreeCacheStatsOut(outf, desfield->cache, &decache, &deread, &dewrite, &desize); if(orgfield) ajBtreeCacheStatsOut(outf, orgfield->cache, &txcache, &txread, &txwrite, &txsize); } ajTimeDel(&begintime); ajTimeDel(&nowtime); } embBtreeDumpParameters(entry); embBtreeCloseCaches(entry); nowtime = ajTimeNewToday(); tdiff = ajTimeDiff(starttime, nowtime); days = (ajint) (tdiff/(24.0*3600.0)); tdiff -= (24.0*3600.0)*(double)days; hours = (ajint) (tdiff/3600.0); tdiff -= 3600.0*(double)hours; mins = (ajint) (tdiff/60.0); tdiff -= 60.0 * (double) mins; if(days) ajFmtPrintF(outf, "Total time: %d %02d:%02d:%04.1f\n", days, hours, mins, tdiff); else if (hours) ajFmtPrintF(outf, "Total time: %d:%02d:%04.1f\n", hours, mins, tdiff); else ajFmtPrintF(outf, "Total time: %d:%04.1f\n", mins, tdiff); ajTimeDel(&nowtime); ajTimeDel(&starttime); if(maxidlen) { ajFmtPrintF(outf, "Resource idlen truncated %u IDs. " "Maximum ID length was %u.", idtrunc, maxidlen); ajWarn("Resource idlen truncated %u IDs. Maximum ID length was %u.", idtrunc, maxidlen); } ajFileClose(&outf); embBtreeEntryDel(&entry); ajStrDel(&tmpstr); ajStrDel(&filename); ajStrDel(&exclude); ajStrDel(&dbname); ajStrDel(&dbrs); ajStrDel(&release); ajStrDel(&datestr); ajStrDel(&directory); ajStrDel(&indexdir); ajStrDel(&dbtype); nfields = 0; while(fieldarray[nfields]) ajStrDel(&fieldarray[nfields++]); AJFREE(fieldarray); ajBtreeIdDel(&idobj); ajBtreePriDel(&priobj); ajBtreeHybDel(&hyb); ajRegFree(&dbxflat_wrdexp); embExit(); return 0; }
int main(int argc, char **argv) { EmbPBtreeEntry entry = NULL; AjPStr dbname = NULL; AjPStr dbrs = NULL; AjPStr release = NULL; AjPStr datestr = NULL; AjPStr directory; AjPStr indexdir; AjPStr filename; AjPStr exclude; AjPStr dbtype = NULL; AjPFile outf = NULL; AjPStr *fieldarray = NULL; ajint nfields; ajint nfiles; AjPStr tmpstr = NULL; AjPStr thysfile = NULL; ajint i; AjPFile inf = NULL; AjPStr word = NULL; AjPBtId idobj = NULL; AjPBtPri priobj = NULL; AjPBtHybrid hyb = NULL; ajulong nentries = 0L; ajulong ientries = 0L; AjPTime starttime = NULL; AjPTime begintime = NULL; AjPTime nowtime = NULL; ajlong startclock = 0; ajlong beginclock = 0; ajlong nowclock = 0; embInit("dbxflat", argc, argv); dbtype = ajAcdGetListSingle("idformat"); fieldarray = ajAcdGetList("fields"); directory = ajAcdGetDirectoryName("directory"); outf = ajAcdGetOutfile("outfile"); indexdir = ajAcdGetOutdirName("indexoutdir"); filename = ajAcdGetString("filenames"); exclude = ajAcdGetString("exclude"); dbname = ajAcdGetString("dbname"); dbrs = ajAcdGetString("dbresource"); release = ajAcdGetString("release"); datestr = ajAcdGetString("date"); entry = embBtreeEntryNew(); tmpstr = ajStrNew(); idobj = ajBtreeIdNew(); priobj = ajBtreePriNew(); hyb = ajBtreeHybNew(); nfields = embBtreeSetFields(entry,fieldarray); embBtreeSetDbInfo(entry,dbname,dbrs,datestr,release,dbtype,directory, indexdir); embBtreeGetRsInfo(entry); nfiles = embBtreeGetFiles(entry,directory,filename,exclude); embBtreeWriteEntryFile(entry); embBtreeOpenCaches(entry); starttime = ajTimeNewToday(); ajFmtPrintF(outf, "Processing directory: %S\n", directory); for(i=0;i<nfiles;++i) { begintime = ajTimeNewToday(); beginclock = ajClockNow(); ajListPop(entry->files,(void **)&thysfile); ajListPushAppend(entry->files,(void *)thysfile); ajFmtPrintS(&tmpstr,"%S%S",entry->directory,thysfile); if(!(inf=ajFileNewInNameS(tmpstr))) ajFatal("Cannot open input file %S\n",tmpstr); ajFilenameTrimPath(&tmpstr); ajFmtPrintF(outf,"Processing file: %S",tmpstr); ientries = 0L; while(dbxflat_NextEntry(entry,inf)) { ++ientries; if(entry->do_id) { if(ajStrGetLen(entry->id) > entry->idlen) { if(ajStrGetLen(entry->id) > maxidlen) { ajWarn("id '%S' too long, truncating to idlen %d", entry->id, entry->idlen); maxidlen = ajStrGetLen(entry->id); } idtrunc++; ajStrKeepRange(&entry->id,0,entry->idlen-1); } ajStrFmtLower(&entry->id); ajStrAssignS(&hyb->key1,entry->id); hyb->dbno = i; hyb->offset = entry->fpos; hyb->dups = 0; ajBtreeHybInsertId(entry->idcache,hyb); } if(entry->do_accession) { while(ajListPop(entry->ac,(void **)&word)) { ajStrFmtLower(&word); ajStrAssignS(&hyb->key1,word); hyb->dbno = i; hyb->offset = entry->fpos; hyb->dups = 0; ajBtreeHybInsertId(entry->accache,hyb); ajStrDel(&word); } } if(entry->do_sv) { while(ajListPop(entry->sv,(void **)&word)) { ajStrFmtLower(&word); ajStrAssignS(&hyb->key1,word); hyb->dbno = i; hyb->offset = entry->fpos; hyb->dups = 0; ajBtreeHybInsertId(entry->svcache,hyb); ajStrDel(&word); } } if(entry->do_keyword) { while(ajListPop(entry->kw,(void **)&word)) { ajStrFmtLower(&word); ajStrAssignS(&priobj->id,entry->id); ajStrAssignS(&priobj->keyword,word); priobj->treeblock = 0; ajBtreeInsertKeyword(entry->kwcache, priobj); ajStrDel(&word); } } if(entry->do_description) { while(ajListPop(entry->de,(void **)&word)) { ajStrFmtLower(&word); ajStrAssignS(&priobj->id,entry->id); ajStrAssignS(&priobj->keyword,word); priobj->treeblock = 0; ajBtreeInsertKeyword(entry->decache, priobj); ajStrDel(&word); } } if(entry->do_taxonomy) { while(ajListPop(entry->tx,(void **)&word)) { ajStrFmtLower(&word); ajStrAssignS(&priobj->id,entry->id); ajStrAssignS(&priobj->keyword,word); priobj->treeblock = 0; ajBtreeInsertKeyword(entry->txcache, priobj); ajStrDel(&word); } } } ajFileClose(&inf); nentries += ientries; nowtime = ajTimeNewToday(); nowclock = ajClockNow(); ajFmtPrintF(outf, " entries: %Lu (%Lu) time: %.1f/%.1fs (%.1f/%.1fs)\n", nentries, ientries, ajClockDiff(startclock,nowclock), ajTimeDiff(starttime, nowtime), ajClockDiff(beginclock,nowclock), ajTimeDiff(begintime, nowtime)); ajTimeDel(&begintime); ajTimeDel(&nowtime); } embBtreeDumpParameters(entry); embBtreeCloseCaches(entry); nowtime = ajTimeNewToday(); ajFmtPrintF(outf, "Total time: %.1fs\n", ajTimeDiff(starttime, nowtime)); ajTimeDel(&nowtime); ajTimeDel(&starttime); if(maxidlen) { ajFmtPrintF(outf, "Resource idlen truncated %u IDs. " "Maximum ID length was %u.", idtrunc, maxidlen); ajWarn("Resource idlen truncated %u IDs. Maximum ID length was %u.", idtrunc, maxidlen); } ajFileClose(&outf); embBtreeEntryDel(&entry); ajStrDel(&tmpstr); ajStrDel(&filename); ajStrDel(&exclude); ajStrDel(&dbname); ajStrDel(&dbrs); ajStrDel(&release); ajStrDel(&datestr); ajStrDel(&directory); ajStrDel(&indexdir); ajStrDel(&dbtype); nfields = 0; while(fieldarray[nfields]) ajStrDel(&fieldarray[nfields++]); AJFREE(fieldarray); ajBtreeIdDel(&idobj); ajBtreePriDel(&priobj); ajBtreeHybDel(&hyb); ajRegFree(&dbxflat_wrdexp); embExit(); return 0; }
int main(ajint argc, char **argv) { /* Variable declarations */ AjPFile inf_edam = NULL; /* Name of EDAM data (input) file */ AjPFile acdoutf = NULL; /* Name of ACD (output) file */ AjPList acdinlist = NULL; /* List of ACD file names (input) */ AjPFile acdinf = NULL; /* Name of ACD (input) file */ AjPStr acdname = NULL; /* Name of current acd file */ AjPDirout acdoutdir = NULL; /* Directory for ACD files (output) */ AjPFile inf_ktype = NULL; /* Name of knowntypes.standard file */ PEdam edam = NULL; /* EDAM relations data */ PKtype ktype = NULL; /* Data from knowntype.standard */ /* Read data from acd. */ embInitP("acdrelations",argc,argv,"MYEMBOSS"); /* ACD data handling */ inf_edam = ajAcdGetDatafile("infileedam"); inf_ktype = ajAcdGetInfile("infiletype"); acdinlist = ajAcdGetDirlist("indir"); acdoutdir = ajAcdGetOutdir("outdir"); /* Read data file */ edam = ajEdamNew(); ktype = ajKtypeNew(); acdrelations_readdatfile(inf_edam, &edam); acdrelations_readtypefile(inf_ktype, &ktype); /* Main application loop. Process each ACD file in turn. */ while(ajListPop(acdinlist,(void **)&acdname)) { if(!(acdinf = ajFileNewInNameS(acdname))) ajFatal("Cannot open input ACD file %S\n", acdname); ajFilenameTrimPath(&acdname); if(!(acdoutf = ajFileNewOutNameDirS(acdname, acdoutdir))) ajFatal("Cannot open output ACD file %S\n", acdname); acdrelations_procacdfile(acdinf, acdoutf, edam, ktype); ajFileClose(&acdinf); ajFileClose(&acdoutf); } /* Clean up and exit */ ajFileClose(&inf_edam); ajFileClose(&inf_ktype); ajListFree(&acdinlist); ajDiroutDel(&acdoutdir); ajEdamDel(&edam); ajExit(); return 0; }
int main(ajint argc, char **argv) { AjPList ccfin = NULL; /* List of CCF (input) files. */ AjPDir pdbin = NULL; /* Path of pdb input files. */ AjPStr pdbprefix = NULL; /* Prefix of pdb input files. */ AjPStr pdb_name = NULL; /* Full name (path/name/extension) of pdb format input file. */ AjPDirout ccfout = NULL; /* Path of coordinate output file. */ AjPStr randomname = NULL; /* Name for temp file tempf. */ AjPStr ccf_this = NULL; AjPStr exec = NULL; AjPStr naccess_str = NULL; AjPStr line = NULL; AjPStr syscmd = NULL; /* Command line arguments. */ AjPStr *mode = NULL; /* Mode of operation from acd. */ AjPFile errf = NULL; /* pdbplus error file pointer. */ AjPFile serrf = NULL; /* stride error file pointer. */ AjPFile nerrf = NULL; /* stride error file pointer. */ AjPFile tempf = NULL; /* Temp file for holding STRIDE output. */ AjPFile ccf_inf = NULL; /* Protein coordinate input file. */ AjPFile ccf_outf = NULL; /* Protein coordinate output file. */ AjIList iter = NULL; AjBool done_naccess= ajFalse; AjBool done_stride = ajFalse; AjBool found = ajFalse; AjPResidue temp_res = NULL; /* Pointer to Residue object. */ AjPPdb pdb_old = NULL; /* Pointer to PDB object - without new stride elements. */ AjPPdb pdb = NULL; /* Pointer to PDB object. */ ajint idn = 0; /* Chain identifier as a number (1,2,...) */ ajint chain_num = 0; /* Chain identifier index (0,1,...). */ ajint tS = 0; /* User-defined threshold size for SSEs. */ ajint nostride = 0; /* No. times stride failed */ ajint nonaccess = 0; /* No. times naccess failed */ ajint nofile = 0; /* No. times of file error */ /* Variables for each item that will be parsed from the ASG line. */ AjPStr res = NULL; /* Residue id from STRIDE ASG line (ALA etc). */ AjPStr res_num = NULL; /* PDB residue number from STRIDE ASG line. */ char pcid = ' '; /* Protein chain identifier from STRIDE or NACESS output (A,B, etc). */ char ss = ' '; /* One-letter secondary structure code from STRIDE ASG line. */ float ph = 0.0; /* Phi angle from STRIDE ASG line. */ float ps = 0.0; /* Psi angle from STRIDE ASG line. */ float sa = 0.0; /* Residue solvent accessible area from STRIDE ASG line. */ float f1 = 0; float f2 = 0; float f3 = 0; float f4 = 0; float f5 = 0; float f6 = 0; float f7 = 0; float f8 = 0; float f9 = 0; float f10 = 0; /* Allocate strings; this section is used for variables that are allocated once only. */ pdb_name = ajStrNew(); res = ajStrNew(); res_num = ajStrNew(); randomname = ajStrNew(); syscmd = ajStrNew(); line = ajStrNew(); naccess_str = ajStrNew(); exec = ajStrNew(); /* Read data from acd. */ embInitPV("pdbplus",argc,argv,"STRUCTURE",VERSION); ccfin = ajAcdGetDirlist("ccfinpath"); pdbin = ajAcdGetDirectory("pdbindir"); pdbprefix = ajAcdGetString("pdbprefix"); ccfout = ajAcdGetOutdir("ccfoutdir"); mode = ajAcdGetList("mode"); errf = ajAcdGetOutfile("logfile"); if(ajStrGetCharFirst(*mode) != '2') serrf = ajAcdGetOutfile("slogfile"); if(ajStrGetCharFirst(*mode) != '1') nerrf = ajAcdGetOutfile("nlogfile"); tS = ajAcdGetInt("thresholdsize"); ajRandomSeed(); ajFilenameSetTempname(&randomname); /* ** Start of main application loop. ** Process each PDB/ protein coordinate file (EMBL format) in turn. */ while(ajListPop(ccfin,(void **)&ccf_this)) { /* Open protein coordinate file. If it cannot be opened, write a message to the error file, delete ccf_this and continue. */ if((ccf_inf = ajFileNewInNameS(ccf_this)) == NULL) { ajWarn("%s%S\n//\n", "clean coordinate file not found: ", ccf_this); ajFmtPrintF(errf, "%s%S\n//\n", "clean coordinate file not found: ", ccf_this); ajStrDel(&ccf_this); nofile++; continue; } ajFmtPrint("Processing %S\n", ccf_this); fflush(stdout); /* Parse protein coordinate data (from clean format file) into AjPPdb object. ajPdbReadAllModelsNew will create the AjPPdb object. */ if(!(pdb_old=ajPdbReadAllModelsNew(ccf_inf))) { ajWarn("ERROR Clean coordinate file read" "error: %S\n//\n", ccf_this); ajFmtPrintF(errf, "ERROR Clean coordinate file read" "error: %S\n//\n", ccf_this); ajFileClose(&ccf_inf); ajStrDel(&ccf_this); nofile++; continue; } ajFileClose(&ccf_inf); ajPdbCopy(&pdb, pdb_old); ajPdbDel(&pdb_old); /* Construct name of corresponding PDB file. NACCESS does *not* generate an output file if the path is './' e.g. naccess ./1rbp.ent , therefore replace './' with null. */ ajStrAssignS(&pdb_name, ajDirGetPath(pdbin)); if(ajStrMatchC(pdb_name, "./") || ajStrMatchC(pdb_name, ".")) ajStrAssignC(&pdb_name, ""); ajStrAppendS(&pdb_name, pdbprefix); ajStrFmtLower(&pdb->Pdb); ajStrAppendS(&pdb_name, pdb->Pdb); ajStrAppendC(&pdb_name, "."); ajStrAppendS(&pdb_name, ajDirGetExt(pdbin)); /* Check corresponding PDB file exists for reading using ajFileStat. */ if(!(ajFilenameExistsRead(pdb_name))) { ajFmtPrintF(errf, "%s%S\n//\n", "PDB file not found: ", pdb_name); ajWarn("%s%S\n//\n", "PDB file not found: ", pdb_name); ajStrDel(&ccf_this); ajPdbDel(&pdb); nofile++; continue; } if(ajStrGetCharFirst(*mode) != '2') { /* ** Create a string containing the STRIDE command line (it needs ** PDB file name & name of temp output file). ** Call STRIDE by using ajSystem. */ ajFmtPrintS(&syscmd, "%S %S -f%S >> %s 2>&1", ajAcdGetpathC("stride"), pdb_name, randomname, ajFileGetNameC(serrf)); ajFmtPrint("%S %S -f%S >> %s 2>&1\n", ajAcdGetpathC("stride"), pdb_name, randomname,ajFileGetNameC(serrf)); system(ajStrGetPtr(syscmd)); /* Open the stride output file */ if (((tempf = ajFileNewInNameS(randomname)) == NULL)) { ajWarn("%s%S\n//\n", "no stride output for: ", pdb_name); ajFmtPrintF(errf, "%s%S\n//\n", "no stride output for: ", pdb_name); nostride++; ajStrDel(&ccf_this); ajPdbDel(&pdb); continue; } else ajFmtPrintF(errf, "%s%S\n//\n", "stride output for: ", pdb_name); done_stride = ajFalse; /* Parse STRIDE output from temp output file a line at a time. */ while(ajReadlineTrim(tempf,&line)) { if(ajStrPrefixC(line,"ASG")) { ajFmtScanS(line, "%*S %S %c %S %*d %c %*S %f %f %f %*S", &res, &pcid, &res_num, &ss, &ph, &ps, &sa); /* ** Populate pdbplus object with the data from this parsed ** line. This means first identifying the chain, then ** finding the residue. */ /* Determine the chain number. ajDmxPdbplusChain does not recognise '-', so change '-' to '.' */ if (pcid == '-') pcid = '.'; /* Get chain number from the chain identifier. */ if(!ajPdbChnidToNum(pcid, pdb, &idn)) { ajWarn("Could not convert chain id %c to chain" " number in pdb file %S\n//\n", pcid, pdb_name); ajFmtPrintF(errf, "Could not convert chain id %c " "to chain number in pdb file %S\n//\n", pcid, pdb_name); continue; } /* ** The chain number that will get written starts at 1, but ** we want an index into an array which must start at 0, ** so subtract 1 from the chain number to get the index. */ chain_num = idn-1; /* ** Iiterate through the list of residues in the Pdb object, ** found switches to true when first residue corresponding ** to the line is found. */ /* iter = ajListIterNewread(pdb->Chains[chain_num]->Atoms); */ iter = ajListIterNewread(pdb->Chains[chain_num]->Residues); found = ajFalse; while((temp_res = (AjPResidue)ajListIterGet(iter))) { /* If we have found the residue we want */ if((ajStrMatchS(res_num, temp_res->Pdb) && ajStrMatchS(res, temp_res->Id3))) { done_stride = ajTrue; found = ajTrue; temp_res->eStrideType = ss; temp_res->Phi = ph; temp_res->Psi = ps; temp_res->Area = sa; } /* If the matching residue has been processed move on to next ASG line, next residue. */ else if(found == ajTrue) break; else /* Matching residue not found yet. */ continue; } ajListIterDel(&iter); } /* End of if ASG loop. */ } /* End of while line loop. */ if(done_stride) ajFmtPrintF(errf, "%s%S\n//\n", "stride data for: ", pdb_name); else { ajFmtPrintF(errf, "%s%S\n//\n", "no stride data for: ", pdb_name); ajWarn("%s%S\n//\n", "no stride data for: ", pdb_name); nostride++; } /* Close STRIDE temp file. & tidy up. */ ajFileClose(&tempf); /* Remove temporary file (stride output file). */ ajFmtPrintS(&exec, "rm %S", randomname); ajSysSystem(exec); /* ** Calculate element serial numbers (eStrideNum)& ammend residue ** objects, count no's of elements and ammend chain object ** (numHelices, num Strands). */ pdbplus_sort(pdb, tS); } if(ajStrGetCharFirst(*mode) != '1') { /* ** Create a string containing the NACCESS command line (it needs ** PDB file name & name of temp output file) & call NACCESS. ** If e.g. /data/structure/pdbfred.ent was parsed and the program ** was run from /stuff, then /stuff/fred.asa and /stuff/fred.rsa ** would be written. These must be deleted once parsed (only ** use the .rsa file here). */ ajFmtPrintS(&syscmd, "%S %S >> %s 2>&1", ajAcdGetpathC("naccess"), pdb_name, ajFileGetNameC(nerrf)); ajFmtPrint("%S %S >> %s 2>&1\n", ajAcdGetpathC("naccess"), pdb_name, ajFileGetNameC(nerrf)); system(ajStrGetPtr(syscmd)); ajStrAssignS(&naccess_str, pdbprefix); ajStrAppendS(&naccess_str, pdb->Pdb); ajStrAppendC(&naccess_str, ".rsa"); /* Open the NACCESS output file. */ if (((tempf = ajFileNewInNameS(naccess_str)) == NULL)) { ajFmtPrintF(errf, "%s%S\n//\n", "no naccess output for: ", pdb_name); ajWarn("%s%S\n//\n", "no naccess output for: ", pdb_name); nonaccess++; ajStrDel(&ccf_this); ajPdbDel(&pdb); continue; } else ajFmtPrintF(errf, "%s%S\n//\n", "naccess output for: ", pdb_name); done_naccess = ajFalse; /* Parse NACCESS output from temp output file a line at a time. */ while(ajReadlineTrim(tempf,&line)) { if(ajStrPrefixC(line,"RES")) { /* Read data from lines. */ if((pcid = line->Ptr[8]) == ' ') ajFmtScanS(line, "%*S %S %S %f %f %f " "%f %f %f %f %f %f %f", &res, &res_num, &f1, &f2, &f3, &f4, &f5, &f6, &f7, &f8, &f9, &f10); else ajFmtScanS(line, "%*S %S %*c %S %f %f " "%f %f %f %f %f %f %f %f", &res, &res_num, &f1, &f2, &f3, &f4, &f5, &f6, &f7, &f8, &f9, &f10); /* Identify the chain, then finding all the residues corresponding to the residue. */ /* Get the chain number from the chain identifier. */ if(!ajPdbChnidToNum(pcid, pdb, &idn)) { ajWarn("Could not convert chain id %c to chain" " number in pdb file %S\n//\n", pcid, pdb_name); ajFmtPrintF(errf, "Could not convert chain id" " %c to chain number in pdb file %S\n//\n", pcid, pdb_name); continue; } /* ** Chain number will start at 1, but we want an index ** into an array which must start at 0, so subtract 1 ** from the chain number to get the index. */ chain_num = idn-1; /* ** Iiterate through the list of residues in the Pdb object, ** temp_res is an AjPResidue used to point to the current ** residue. ** ajBool found switches to true when first residue ** corresponding to the line is found. */ iter = ajListIterNewread(pdb->Chains[chain_num]->Residues); found = ajFalse; while((temp_res = (AjPResidue)ajListIterGet(iter))) { /* If we have found the residue we want, write the residue object. */ if((ajStrMatchS(res_num, temp_res->Pdb) && ajStrMatchS(res, temp_res->Id3))) { found = ajTrue; done_naccess = ajTrue; temp_res->all_abs = f1; temp_res->all_rel = f2; temp_res->side_abs = f3; temp_res->side_rel = f4; temp_res->main_abs = f5; temp_res->main_rel = f6; temp_res->npol_abs = f7; temp_res->npol_rel = f8; temp_res->pol_abs = f9; temp_res->pol_rel = f10; } /* If the matching residues have all been processed. move on to next ASG line, next residue. */ else if(found == ajTrue) break; else /* Matching residues not found yet, move on to next residue. */ continue; } ajListIterDel(&iter); } } if(done_naccess) ajFmtPrintF(errf, "%s%S\n//\n", "naccess data for: ", pdb_name); else { ajFmtPrintF(errf, "%s%S\n//\n", "no naccess data for: ", pdb_name); ajWarn("%s%S\n//\n", "no naccess data for: ", pdb_name); nonaccess++; } /* Remove temporary file (naccess output files). */ ajFileClose(&tempf); ajFmtPrintS(&exec, "rm %S", naccess_str); ajSysSystem(exec); ajStrAssignS(&naccess_str, pdbprefix); ajStrAppendS(&naccess_str, pdb->Pdb); ajStrAppendC(&naccess_str, ".asa"); ajFmtPrintS(&exec, "rm %S", naccess_str); ajSysSystem(exec); ajStrAssignS(&naccess_str, pdbprefix); ajStrAppendS(&naccess_str, pdb->Pdb); ajStrAppendC(&naccess_str, ".log"); ajFmtPrintS(&exec, "rm %S", naccess_str); ajSysSystem(exec); } /* Open CCF (output) file. */ ccf_outf = ajFileNewOutNameDirS(pdb->Pdb, ccfout); /* Write AjPPdb object to the output file in clean format. */ if(!ajPdbWriteAll(ccf_outf, pdb)) { ajWarn("%s%S\n//\n","Could not write results file for: ", pdb->Pdb); ajFmtPrintF(errf,"%s%S\n//\n","Could not write results file for ", pdb->Pdb); } ajFileClose(&ccf_outf); ajPdbDel(&pdb); ajStrDel(&ccf_this); } /* End of main application loop. */ ajFmtPrint("STRIDE failures: %d\n", nostride); ajFmtPrint("NACCESS failures: %d\n", nonaccess); ajFmtPrintF(errf, "\n\nSTRIDE failures: %d\nNACCESS failures: %d\n", nostride, nonaccess); ajListFree(&ccfin); ajDirDel(&pdbin); ajStrDel(&pdbprefix); ajStrDel(&pdb_name); ajDiroutDel(&ccfout); ajStrDel(&res); ajStrDel(&res_num); ajStrDel(&randomname); ajStrDel(&line); ajStrDel(&naccess_str); ajStrDel(&exec); ajStrDel(&syscmd); ajFileClose(&errf); if(ajStrGetCharFirst(*mode) != '2') ajFileClose(&serrf); if(ajStrGetCharFirst(*mode) != '1') ajFileClose(&nerrf); ajStrDel(&mode[0]); AJFREE(mode); ajExit(); return 0; }
int main(int argc, char **argv) { const char *codons[]= { "TAG","TAA","TGA","GCG","GCA","GCT","GCC","TGT", /* 00-07 */ "TGC","GAT","GAC","GAA","GAG","TTT","TTC","GGT", /* 08-15 */ "GGG","GGA","GGC","CAT","CAC","ATA","ATT","ATC", /* 16-23 */ "AAA","AAG","CTA","TTA","TTG","CTT","CTC","CTG", /* 24-31 */ "ATG","AAT","AAC","CCG","CCA","CCT","CCC","CAA", /* 32-39 */ "CAG","CGT","CGA","CGC","AGG","AGA","CGG","TCG", /* 40-47 */ "TCA","AGT","TCT","TCC","AGC","ACG","ACT","ACA", /* 48-55 */ "ACC","GTA","GTT","GTC","GTG","TGG","TAT","TAC" /* 56-63 */ }; const char *aa= "***AAAACCDDEEFFGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVWYY"; AjPFile inf = NULL; AjPFile outf = NULL; char *entryname = NULL; AjPStr fname = NULL; AjPStr key = NULL; AjPStr tmpkey = NULL; AjBool allrecords = AJFALSE; AjPTable table = NULL; ajint i = 0; ajint j = 0; ajint k = 0; ajint x = 0; ajint savecount[3]; AjPStr *keyarray = NULL; CutgPValues *valarray = NULL; AjPCod codon = NULL; ajint sum = 0; char c; AjPList flist = NULL; AjPFile logf = NULL; AjPStr entry = NULL; AjPStr baseentry = NULL; AjPStr wild = NULL; AjPStr division = NULL; AjPStr release = NULL; AjPStr wildspecies = NULL; CutgPValues value = NULL; AjPStr docstr = NULL; AjPStr species = NULL; AjPStr filename = NULL; ajint nstops; embInit("cutgextract",argc,argv); tmpkey = ajStrNew(); fname = ajStrNew(); table = ajTablestrNewLen(TABLE_ESTIMATE); flist = ajAcdGetDirlist("directory"); wild = ajAcdGetString("wildspec"); release = ajAcdGetString("release"); logf = ajAcdGetOutfile("outfile"); wildspecies = ajAcdGetString("species"); filename = ajAcdGetString("filename"); allrecords = ajAcdGetBoolean("allrecords"); ajStrInsertC(&release, 0, "CUTG"); ajStrRemoveWhite(&release); while(ajListPop(flist,(void **)&entry)) { ajStrAssignS(&baseentry, entry); ajFilenameTrimPath(&baseentry); ajDebug("Testing file '%S'\n", entry); if(!ajStrMatchWildS(baseentry,wild)) { ajStrDel(&entry); continue; } ajDebug("... matched wildcard '%S'\n", wild); inf = ajFileNewInNameS(entry); if(!inf) ajFatal("cannot open file %S",entry); ajFmtPrintS(&division, "%F", inf); ajFilenameTrimAll(&division); while((entryname = cutgextract_next(inf, wildspecies, &species, &docstr))) { if(ajStrGetLen(filename)) ajStrAssignS(&tmpkey,filename); else ajStrAssignC(&tmpkey,entryname); /* See if organism is already in the table */ value = ajTableFetch(table,tmpkey); if(!value) /* Initialise */ { key = ajStrNewS(tmpkey); AJNEW0(value); ajStrAssignS(&value->Species,species); ajStrAssignS(&value->Division, division); ajTablePut(table,(void *)key,(void *)value); } for(k=0;k<3;k++) savecount[k] = value->Count[k]; nstops = cutgextract_readcodons(inf,allrecords, value->Count); if(nstops < 1) { value->Skip++; continue; } value->CdsCount++; if(nstops>1) { value->CdsCount += (nstops - 1); value->Warn++; ajWarn("Found %d stop codons (%d %d %d) for CDS '%S'", nstops, value->Count[0] - savecount[0], value->Count[1] - savecount[1], value->Count[2] - savecount[2], cutgextractSavepid); } } ajStrDel(&entry); ajFileClose(&inf); } ajTableToarrayKeysValues(table,(void***) &keyarray, (void***) &valarray); i = 0; while(keyarray[i]) { key = keyarray[i]; value = (CutgPValues) valarray[i++]; codon = ajCodNew(); sum = 0; for(j=0;j<CODONS;++j) { sum += value->Count[j]; x = ajCodIndexC(codons[j]); codon->num[x] = value->Count[j]; c = aa[j]; if(c=='*') codon->aa[x] = 27; else codon->aa[x] = c-'A'; } ajCodCalcUsage(codon,sum); ajStrAppendC(&key, ".cut"); if(allrecords) { if(value->Warn) ajFmtPrintF(logf, "Writing %S CDS: %d Warnings: %d\n", key, value->CdsCount, value->Warn); else ajFmtPrintF(logf, "Writing %S CDS: %d\n", key, value->CdsCount); } else { if(value->Skip) ajFmtPrintF(logf, "Writing %S CDS: %d Skipped: %d\n", key, value->CdsCount, value->Skip); else ajFmtPrintF(logf, "Writing %S CDS: %d\n", key, value->CdsCount); } ajFmtPrintS(&fname,"CODONS/%S",key); outf = ajDatafileNewOutNameS(fname); if(!outf) ajFatal("Cannot open output file %S",fname); ajCodSetNameS(codon, key); ajCodSetSpeciesS(codon, value->Species); ajCodSetDivisionS(codon, value->Division); ajCodSetReleaseS(codon, release); ajCodSetNumcds(codon, value->CdsCount); ajCodSetNumcodons(codon, sum); ajCodWrite(codon, outf); ajFileClose(&outf); ajStrDel(&key); ajStrDel(&value->Division); ajStrDel(&value->Doc); ajStrDel(&value->Species); AJFREE(value); ajCodDel(&codon); } AJFREE(keyarray); AJFREE(valarray); ajTableFree(&table); ajListFree(&flist); ajStrDel(&wild); ajStrDel(&release); ajStrDel(&wildspecies); ajStrDel(&filename); ajFileClose(&logf); ajStrDel(&cutgextractSavepid); ajStrDel(&cutgextractLine); ajStrDel(&cutgextractOrg); ajStrDel(&fname); ajStrDel(&tmpkey); ajStrDel(&species); ajStrDel(&docstr); ajStrDel(&division); ajStrDel(&baseentry); embExit(); return 0; }
int main(int argc, char **argv) { AjPFile infdat = NULL; AjPFile infdoc = NULL; AjPFile outf = NULL; AjPFile outs = NULL; AjBool haspattern; const char *p; AjPStr line = NULL; AjPStr text = NULL; AjPStr dirname = NULL; AjPStr filename = NULL; AjPStr id = NULL; AjPStr ac = NULL; AjPStr de = NULL; AjPStr pa = NULL; AjPStr ps = NULL; AjPStr fn = NULL; AjPStr re = NULL; AjPStr fname = NULL; AjBool flag; AjBool isopen; AjBool goback; ajlong storepos = 0L; embInit("prosextract", argc, argv); dirname = ajAcdGetDirectoryName("prositedir"); line = ajStrNew(); text = ajStrNew(); id = ajStrNew(); ac = ajStrNew(); de = ajStrNew(); pa = ajStrNew(); ps = ajStrNew(); fn=ajStrNew(); ajStrAssignS(&fn,dirname); ajStrAppendC(&fn,"prosite.dat"); if(!(infdat=ajFileNewInNameS(fn))) ajFatal("Cannot open file %S",fn); ajStrDel(&fn); fn=ajStrNewC("PROSITE/prosite.lines"); outf = ajDatafileNewOutNameS(fn); ajStrDel(&fn); haspattern = ajFalse; while(ajReadlineTrim(infdat, &line) ) { if(ajStrPrefixC(line, "ID")) { if(ajStrSuffixC(line,"PATTERN.")) { haspattern = ajTrue; /*save id*/ p = ajStrGetPtr(line); p = ajSysFuncStrtok(p," \t;"); p = ajSysFuncStrtok(NULL," \t;"); ajStrAssignC(&id,p); ajFmtPrintF(outf, "%S ", id); continue; } else { haspattern = ajFalse; continue; } } if(!haspattern) continue; if(ajStrPrefixC(line, "AC") ) { p = ajStrGetPtr(line); p = ajSysFuncStrtok(p, " \t;"); p = ajSysFuncStrtok(NULL, " \t;"); ajStrAssignC(&ac,p); ajFmtPrintF(outf, "%S\n ", ac); continue; } if(ajStrPrefixC(line, "DE") ) { p = ajStrGetPtr(line); p = ajSysFuncStrtok(p, " \t."); p = ajSysFuncStrtok(NULL, " \t."); ajStrAssignC(&de,p); ajFmtPrintF(outf, "%S\n ", de); continue; } if(ajStrPrefixC(line, "PA")) { ajStrAssignC(&pa,""); while(ajStrPrefixC(line,"PA")) { p = ajStrGetPtr(line); p = ajSysFuncStrtok(p, " \t."); p = ajSysFuncStrtok(NULL, " \t."); ajStrAppendC(&pa,p); ajReadlineTrim(infdat, &line); } ajFmtPrintF(outf, "%S\n", pa); re = embPatPrositeToRegExp(pa); ajFmtPrintF(outf, "^%S\n\n", re); ajStrDel(&re); continue; } } /* Finished processing prosite.dat so look at prosite.doc */ fn = ajStrNew(); ajStrAssignS(&fn,dirname); ajStrAppendC(&fn,"prosite.doc"); if(!(infdoc=ajFileNewInNameS(fn))) ajFatal("Cannot open file %S",fn); ajStrDel(&fn); fname = ajStrNewC("PROSITE/"); flag = ajFalse; isopen = ajFalse; goback = ajFalse; while(ajReadlineTrim(infdoc, &text)) { if(ajStrPrefixC(text, "{PS") && isopen && !goback) goback = ajTrue; if(ajStrPrefixC(text, "{PS") && !isopen) { storepos = ajFileResetPos(infdoc); /* save out the documentation text to acc numbered outfiles . */ p = ajStrGetPtr(text)+1; p = ajSysFuncStrtok(p, ";"); ajStrAssignS(&filename, fname); ajStrAppendC(&filename, p); outs = ajDatafileNewOutNameS(filename); flag = ajTrue; isopen = ajTrue; continue; } if(ajStrPrefixC(text, "{BEGIN}") && flag) { while(ajReadlineTrim(infdoc, &text)) { if(ajStrPrefixC(text,"{END}")) break; ajFmtPrintF(outs, "%S\n", text); } ajFileClose(&outs); isopen = ajFalse; if(goback) { goback = ajFalse; ajFileSeek(infdoc,storepos,0); } } } ajStrDel(&line); ajStrDel(&text); ajStrDel(&dirname); ajStrDel(&filename); ajStrDel(&id); ajStrDel(&ac); ajStrDel(&de); ajStrDel(&pa); ajStrDel(&re); ajStrDel(&ps); ajStrDel(&fname); ajFileClose(&infdat); ajFileClose(&infdoc); ajFileClose(&outf); embExit(); return 0; }
static void jaspextract_copyfiles(AjPStr directory) { AjPStr matrixfile = NULL; AjPList flist = NULL; AjPStr wild = NULL; AjPStr entry = NULL; AjPStr bname = NULL; AjPStr line = NULL; AjPStr dest = NULL; const AjPStr datadir = NULL; ajuint preflen = 0; ajuint i = 0; const char *p = NULL; AjPFile inf = NULL; AjPFile outf = NULL; matrixfile = ajStrNew(); flist = ajListNew(); wild = ajStrNewC("*.pfm"); bname = ajStrNew(); line = ajStrNew(); dest = ajStrNew(); datadir = ajDatafileValuePath(); if(!datadir) ajFatal("jaspextract: Cannot determine the EMBOSS data directory"); ajFmtPrintS(&matrixfile,"%S%s",directory,MATRIXFILE); if(!ajFilenameExistsRead(matrixfile)) ajFatal("jaspextract: Directory (%S) doesn't appear to be a JASPAR " "one\nNo matrix_list.txt file found",directory); ajFilelistAddPathWild(flist, directory, wild); while(ajListPop(flist,(void **)&entry)) { ajStrAssignS(&bname,entry); ajFilenameTrimPath(&bname); i = 0; while(Jprefix[i].Prefix) { if(!ajStrPrefixC(bname,Jprefix[i].Prefix)) { ++i; continue; } preflen = strlen(Jprefix[i].Prefix); p = ajStrGetPtr(bname); if(p[preflen]>='0' && p[preflen]<='9') break; ++i; } if(!Jprefix[i].Prefix) { ajStrDel(&entry); continue; } ajFmtPrintS(&dest,"%S%s%c%S",datadir,Jprefix[i].Directory,SLASH_CHAR, bname); outf = ajFileNewOutNameS(dest); if(!outf) ajFatal("Cannot open output file %S",dest); /* Avoid UNIX copy for portability */ inf = ajFileNewInNameS(entry); if(!inf) ajFatal("Cannot open input file: %S",entry); while(ajReadlineTrim(inf,&line)) ajFmtPrintF(outf,"%S\n",line); ajFileClose(&inf); ajFileClose(&outf); ajStrDel(&entry); } ajListFree(&flist); ajStrDel(&wild); ajStrDel(&dest); ajStrDel(&line); ajStrDel(&bname); ajStrDel(&matrixfile); return; }
/* @funcstatic domainalign_stamp ********************************************** ** ** Call STAMP and process files. ** ** @param [r] prevdomain [AjPDomain] Previous domain. ** @param [r] domain [AjPDomain] This domain. ** @param [r] daf [AjPDirout] Domain alignment files. ** @param [r] super [AjPDirout] Superimposition files. ** @param [r] singlets [AjPDirout] Singlet files. ** @param [r] align [AjPStr] Align. ** @param [r] alignc [AjPStr] Alignc. ** @param [r] dom [AjPStr] Dom. ** @param [r] name [AjPStr] Name. ** @param [r] set [AjPStr] Name of set file. ** @param [r] scan [AjPStr] Name of scan file. ** @param [r] sort [AjPStr] Name of sort file. ** @param [r] log [AjPStr] Log file name. ** @param [r] out [AjPStr] Out file name. ** @param [r] keepsinglets [AjBool] Keep singlet sequences or not. ** @param [r] moden [ajint] Mode number. ** @param [r] noden [ajint] Node number. ** @param [r] nset [ajint] Number in set. ** @param [r] logf [AjPFile] Lof file. ** ** ** @return [void] True on success ** @@ ****************************************************************************/ static void domainalign_stamp(AjPDomain prevdomain, AjPDomain domain, AjPDirout daf, AjPDirout super, AjPDirout singlets, AjPStr align, AjPStr alignc, AjPStr dom, AjPStr name, AjPStr set, AjPStr scan, AjPStr sort, AjPStr log, AjPStr out, AjBool keepsinglets, ajint moden, ajint noden, ajint nset, AjPFile logf) { AjPStr exec = NULL; /* The UNIX command line to be executed. */ AjPFile clusterf = NULL; /* File pointer for log file. */ ajint ncluster = 0; /* Counter for the number of clusters. */ AjPStr line = NULL; /* Holds a line from the log file. */ AjPRegexp rexp = NULL; /* For parsing no. of clusters in log file */ AjPStr temp = NULL; /* A temporary string. */ ajint x = 0; /* Loop counter. */ exec = ajStrNew(); line = ajStrNew(); temp = ajStrNew(); rexp = ajRegCompC("^(Cluster:)"); ajDebug("domainalign_stamp name: '%S'\n", name); /* Call STAMP. */ ajFmtPrintS(&exec, "%S -l %S -s -n 2 -slide 5 -prefix %S -d %S", ajAcdGetpathC("stamp"), dom, name, set); ajFmtPrint("\n%S\n\n", exec); ajSysExecS(exec); ajFmtPrintS(&exec, "%S -f %S -s Sc 2.5", ajAcdGetpathC("sorttrans"), scan); ajFmtPrint("\n%S > %S\n\n", exec, sort); ajSysExecOutnameS(exec, sort); ajFmtPrintS(&exec, "%S -l %S -prefix %S", ajAcdGetpathC("stamp"), sort, name); ajFmtPrint("\n%S > %S\n\n", exec, log); ajSysExecOutnameS(exec, log); ajFmtPrintS(&exec, "%S -f %S -g -o %S", ajAcdGetpathC("transform"), sort, alignc); ajFmtPrint("\n%S\n\n", exec); ajSysExecS(exec); /* Count the number of clusters in the log file. */ if(!(clusterf=ajFileNewInNameS(log))) ajFatal("Could not open log file '%S'\n", log); ncluster=0; while(ajReadlineTrim(clusterf,&line)) if(ajRegExec(rexp,line)) ncluster++; ajFileClose(&clusterf); ajDebug("ncluster: %d\n", ncluster); /* Call STAMP ... calculate two fields for structural equivalence using threshold Pij value of 0.5, see stamp manual v4.1 pg 27. */ ajFmtPrintS(&exec,"%S -f %S.%d -min 0.5", ajAcdGetpathC("poststamp"), name, ncluster); ajFmtPrint("%S\n\n", exec); ajSysExecS(exec); /* Call STAMP ... convert block format alignment into clustal format. */ ajFmtPrintS(&exec,"%S -f %S.%d.post", ajAcdGetpathC("ver2hor"), name, ncluster); ajFmtPrint("%S > %S\n\n", exec, out); ajSysExecOutnameS(exec, out); /* Process STAMP alignment file and generate alignment file for output. */ domainalign_ProcessStampFile(out, align, prevdomain, noden, logf); /* Remove all temporary files. */ for(x=1;x<ncluster+1;x++) { ajFmtPrintS(&temp, "%S.%d", name, x); ajSysFileUnlinkS(temp); } ajFmtPrintS(&temp, "%S.%d.post", name, ncluster); ajSysFileUnlinkS(temp); ajStrDel(&exec); ajStrDel(&line); ajStrDel(&temp); ajRegFree(&rexp); return; }
/* @funcstatic domainalign_ProcessTcoffeeFile ********************************* ** ** Parses tcoffee output. ** ** @param [r] in [AjPStr] Name of TCOFFEE input file ** @param [r] align [AjPStr] Name of sequence alignment file for output ** @param [r] domain [AjPDomain] Domain being aligned ** @param [r] noden [ajint] Node-level of alignment** ** @param [r] logf [AjPFile] Log file ** ** @return [void] True on success ** @@ ****************************************************************************/ static void domainalign_ProcessTcoffeeFile(AjPStr in, AjPStr align, AjPDomain domain, ajint noden, AjPFile logf) { AjPFile outf = NULL; /* Output file pointer. */ AjPFile inf = NULL; /* Input file pointer. */ AjPStr temp1 = NULL; /* Temporary string. */ AjPStr temp2 = NULL; /* Temporary string. */ AjPStr temp3 = NULL; /* Temporary string. */ AjPStr line = NULL; /* Line of text from input file. */ /* Initialise strings. */ line = ajStrNew(); temp1 = ajStrNew(); temp2 = ajStrNew(); temp3 = ajStrNew(); /* Open input and output files. */ if(!(inf=ajFileNewInNameS(in))) ajFatal("Could not open input file in domainalign_ProcessTcoffeeFile"); if(!(outf=ajFileNewOutNameS(align))) ajFatal("Could not open output file in domainalign_ProcessTcoffeeFile"); /*Write DOMAIN classification records to file*/ if((domain->Type == ajSCOP)) { ajFmtPrintF(outf,"# TY SCOP\n# XX\n"); ajFmtPrintF(outf,"# CL %S",domain->Scop->Class); ajFmtPrintSplit(outf,domain->Scop->Fold,"\n# XX\n# FO ",75," \t\n\r"); ajFmtPrintSplit(outf,domain->Scop->Superfamily,"# XX\n# SF ",75," \t\n\r"); ajFmtPrintSplit(outf,domain->Scop->Family,"# XX\n# FA ",75," \t\n\r"); ajFmtPrintF(outf,"# XX\n"); } else { ajFmtPrintF(outf,"# TY CATH\n# XX\n"); ajFmtPrintF(outf,"# CL %S",domain->Cath->Class); ajFmtPrintSplit(outf,domain->Cath->Architecture,"\n# XX\n# AR ",75," \t\n\r"); ajFmtPrintSplit(outf,domain->Cath->Topology,"# XX\n# TP ",75," \t\n\r"); ajFmtPrintSplit(outf,domain->Cath->Superfamily,"# XX\n# SF ",75," \t\n\r"); ajFmtPrintF(outf,"# XX\n"); } if((domain->Type == ajSCOP)) { if(noden==1) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Scop->Sunid_Class); else if(noden==2) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Scop->Sunid_Fold); else if(noden==3) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Scop->Sunid_Superfamily); else if(noden==4) ajFmtPrintF(outf,"# SI %d\n# XX\n", domain->Scop->Sunid_Family); else ajFatal("Node number error in domainalign_ProcessStampFile"); } else { if(noden==5) ajFmtPrintF(outf,"# SI %d\n# XX\n", domain->Cath->Class_Id); else if(noden==6) ajFmtPrintF(outf,"# SI %d\n# XX\n", domain->Cath->Arch_Id); else if(noden==7) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Cath->Topology_Id); else if(noden==8) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Cath->Superfamily_Id); else if(noden==9) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Cath->Family_Id); else ajFatal("Node number error in domainalign_ProcessStampFile"); } /* Start of code for reading input file. */ /*Ignore everything up to first line beginning with 'Number'*/ while((ajReadlineTrim(inf,&line))) /* ajFileReadLine will trim the tailing \n. */ if((ajStrGetCharPos(line, 1)=='\0')) break; /* Read rest of input file. */ while((ajReadlineTrim(inf,&line))) { if((ajStrGetCharPos(line, 1)=='\0')) continue; /* Print the number line out as it is. */ else if(ajStrPrefixC(line,"CLUSTAL")) continue; else if(ajStrPrefixC(line," ")) ajFmtPrintF(outf,"\n"); /* write out a block of protein sequences. */ else { /* Read only the 7 characters of the domain identifier code in. */ ajFmtScanS(line, "%S %S", &temp1,&temp3); ajStrAssignSubS(&temp2, temp1, 0, 6); /* Read the sequence ajStrAssignSubS(&temp3, line, 13, 69); ajStrExchangeSetCC(&temp3, " ", "X"); ajStrFmtUpper(&temp3);*/ /* Write domain id code and sequence out. */ ajFmtPrintF(outf,"%-13S%S\n",temp2, temp3); } } /* Clean up and close input and output files. */ ajFileClose(&outf); ajFileClose(&inf); ajStrDel(&line); ajStrDel(&temp1); ajStrDel(&temp2); ajStrDel(&temp3); /* All done. */ return; }
int main(int argc, char **argv) { AjPSeqall seqall = NULL; AjPFile dend_outfile = NULL; AjPStr tmp_dendfilename = NULL; AjPFile tmp_dendfile = NULL; AjPStr tmp_aln_outfile = NULL; AjPSeqset seqset = NULL; AjPSeqout seqout = NULL; AjPSeqin seqin = NULL; AjBool only_dend; AjBool are_prot = ajFalse; AjBool do_slow; AjBool use_dend; AjPFile dend_file = NULL; AjPStr dend_filename = NULL; ajint ktup; ajint gapw; ajint topdiags; ajint window; AjBool nopercent; AjPStr pw_matrix = NULL; AjPStr pw_dna_matrix = NULL; AjPFile pairwise_matrix = NULL; float pw_gapc; float pw_gapv; AjPStr pwmstr = NULL; char pwmc = '\0'; AjPStr pwdstr = NULL; char pwdc = '\0'; AjPStr m1str = NULL; AjPStr m2str = NULL; char m1c = '\0'; char m2c = '\0'; AjPStr matrix = NULL; AjPStr dna_matrix = NULL; AjPFile ma_matrix = NULL; float gapc; float gapv; AjBool endgaps; AjBool norgap; AjBool nohgap; ajint gap_dist; ajint maxdiv; AjPStr hgapres = NULL; AjPSeqout fil_file = NULL; AjPSeq seq = NULL; AjPStr cmd = NULL; AjPStr tmp = NULL; AjPStr tmpFilename; AjPStr line = NULL; ajint nb = 0; /* get all the parameters */ embInit("emma", argc, argv); pwmstr = ajStrNew(); pwdstr = ajStrNew(); m1str = ajStrNew(); m2str = ajStrNew(); seqall = ajAcdGetSeqall("sequence"); seqout = ajAcdGetSeqoutset("outseq"); dend_outfile = ajAcdGetOutfile("dendoutfile"); only_dend = ajAcdGetToggle("onlydend"); use_dend = ajAcdGetToggle("dendreuse"); dend_file = ajAcdGetInfile("dendfile"); if (dend_file) ajStrAssignS(&dend_filename, ajFileGetPrintnameS(dend_file)); ajFileClose(&dend_file); do_slow = ajAcdGetToggle("slowalign"); ktup = ajAcdGetInt("ktup"); gapw = ajAcdGetInt("gapw"); topdiags = ajAcdGetInt("topdiags"); window = ajAcdGetInt("window"); nopercent = ajAcdGetBoolean("nopercent"); pw_matrix = ajAcdGetListSingle("pwmatrix"); pwmc = ajStrGetCharFirst(pw_matrix); if(pwmc=='b') ajStrAssignC(&pwmstr,"blosum"); else if(pwmc=='p') ajStrAssignC(&pwmstr,"pam"); else if(pwmc=='g') ajStrAssignC(&pwmstr,"gonnet"); else if(pwmc=='i') ajStrAssignC(&pwmstr,"id"); else if(pwmc=='o') ajStrAssignC(&pwmstr,"own"); pw_dna_matrix = ajAcdGetListSingle("pwdnamatrix"); pwdc = ajStrGetCharFirst(pw_dna_matrix); if(pwdc=='i') ajStrAssignC(&pwdstr,"iub"); else if(pwdc=='c') ajStrAssignC(&pwdstr,"clustalw"); else if(pwdc=='o') ajStrAssignC(&pwdstr,"own"); pairwise_matrix = ajAcdGetInfile("pairwisedatafile"); pw_gapc = ajAcdGetFloat( "pwgapopen"); pw_gapv = ajAcdGetFloat( "pwgapextend"); matrix = ajAcdGetListSingle( "matrix"); m1c = ajStrGetCharFirst(matrix); if(m1c=='b') ajStrAssignC(&m1str,"blosum"); else if(m1c=='p') ajStrAssignC(&m1str,"pam"); else if(m1c=='g') ajStrAssignC(&m1str,"gonnet"); else if(m1c=='i') ajStrAssignC(&m1str,"id"); else if(m1c=='o') ajStrAssignC(&m1str,"own"); dna_matrix = ajAcdGetListSingle( "dnamatrix"); m2c = ajStrGetCharFirst(dna_matrix); if(m2c=='i') ajStrAssignC(&m2str,"iub"); else if(m2c=='c') ajStrAssignC(&m2str,"clustalw"); else if(m2c=='o') ajStrAssignC(&m2str,"own"); ma_matrix = ajAcdGetInfile("mamatrixfile"); gapc = ajAcdGetFloat("gapopen"); gapv = ajAcdGetFloat("gapextend"); endgaps = ajAcdGetBoolean("endgaps"); norgap = ajAcdGetBoolean("norgap"); nohgap = ajAcdGetBoolean("nohgap"); gap_dist = ajAcdGetInt("gapdist"); hgapres = ajAcdGetString("hgapres"); maxdiv = ajAcdGetInt("maxdiv"); tmp = ajStrNewC("fasta"); /* ** Start by writing sequences into a unique temporary file ** get file pointer to unique file */ fil_file = ajSeqoutNew(); tmpFilename = emma_getUniqueFileName(); if(!ajSeqoutOpenFilename( fil_file, tmpFilename)) embExitBad(); /* Set output format to fasta */ ajSeqoutSetFormatS( fil_file, tmp); while(ajSeqallNext(seqall, &seq)) { /* ** Check sequences are all of the same type ** Still to be done ** Write out sequences */ if (!nb) are_prot = ajSeqIsProt(seq); ajSeqoutWriteSeq(fil_file, seq); ++nb; } ajSeqoutClose(fil_file); if(nb < 2) ajFatal("Multiple alignments need at least two sequences"); /* Generate clustalw command line */ cmd = ajStrNewS(ajAcdGetpathC("clustalw")); /* add tmp file containing sequences */ ajStrAppendC(&cmd, " -infile="); ajStrAppendS(&cmd, tmpFilename); /* add out file name */ tmp_aln_outfile = emma_getUniqueFileName(); ajStrAppendC(&cmd, " -outfile="); ajStrAppendS(&cmd, tmp_aln_outfile); /* calculating just the nj tree or doing full alignment */ if(only_dend) ajStrAppendC(&cmd, " -tree"); else if(!use_dend) ajStrAppendC(&cmd, " -align"); /* Set sequence type from information from acd file */ if(are_prot) ajStrAppendC(&cmd, " -type=protein"); else ajStrAppendC(&cmd, " -type=dna"); /* ** set output to MSF format - will read in this file later and output ** user requested format */ ajStrAppendC(&cmd, " -output="); ajStrAppendC(&cmd, "gcg"); /* If going to do pairwise alignment */ if(!use_dend) { /* add fast pairwise alignments*/ if(!do_slow) { ajStrAppendC(&cmd, " -quicktree"); ajStrAppendC(&cmd, " -ktuple="); ajStrFromInt(&tmp, ktup); ajStrAppendS(&cmd, tmp); ajStrAppendC(&cmd, " -window="); ajStrFromInt(&tmp, window); ajStrAppendS(&cmd, tmp); if(nopercent) ajStrAppendC(&cmd, " -score=percent"); else ajStrAppendC(&cmd, " -score=absolute"); ajStrAppendC(&cmd, " -topdiags="); ajStrFromInt(&tmp, topdiags); ajStrAppendS(&cmd, tmp); ajStrAppendC(&cmd, " -pairgap="); ajStrFromInt(&tmp, gapw); ajStrAppendS(&cmd, tmp); } else { if(pairwise_matrix) { if(are_prot) ajStrAppendC(&cmd, " -pwmatrix="); else ajStrAppendC(&cmd, " -pwdnamatrix="); ajStrAppendS(&cmd, ajFileGetPrintnameS(pairwise_matrix)); } else { if(are_prot) { ajStrAppendC(&cmd, " -pwmatrix="); ajStrAppendS(&cmd, pwmstr); } else { ajStrAppendC(&cmd, " -pwdnamatrix="); ajStrAppendS(&cmd, pwdstr); } } ajStrAppendC(&cmd, " -pwgapopen="); ajStrFromFloat(&tmp, pw_gapc, 3); ajStrAppendS(&cmd, tmp); ajStrAppendC(&cmd, " -pwgapext="); ajStrFromFloat(&tmp, pw_gapv, 3); ajStrAppendS(&cmd, tmp); } } /* Multiple alignments */ /* using existing tree or generating new tree? */ if(use_dend) { ajStrAppendC(&cmd, " -usetree="); ajStrAppendS(&cmd, dend_filename); } else { /* use tmp file to hold dend file, will read back in later */ tmp_dendfilename = emma_getUniqueFileName(); ajStrAppendC(&cmd, " -newtree="); ajStrAppendS(&cmd, tmp_dendfilename); } if(ma_matrix) { if(are_prot) ajStrAppendC(&cmd, " -matrix="); else ajStrAppendC(&cmd, " -pwmatrix="); ajStrAppendS(&cmd, ajFileGetPrintnameS(ma_matrix)); } else { if(are_prot) { ajStrAppendC(&cmd, " -matrix="); ajStrAppendS(&cmd, m1str); } else { ajStrAppendC(&cmd, " -dnamatrix="); ajStrAppendS(&cmd, m2str); } } ajStrAppendC(&cmd, " -gapopen="); ajStrFromFloat(&tmp, gapc, 3); ajStrAppendS(&cmd, tmp); ajStrAppendC(&cmd, " -gapext="); ajStrFromFloat(&tmp, gapv, 3); ajStrAppendS(&cmd, tmp); ajStrAppendC(&cmd, " -gapdist="); ajStrFromInt(&tmp, gap_dist); ajStrAppendS(&cmd, tmp); ajStrAppendC(&cmd, " -hgapresidues="); ajStrAppendS(&cmd, hgapres); if(!endgaps) ajStrAppendC(&cmd, " -endgaps"); if(norgap) ajStrAppendC(&cmd, " -nopgap"); if(nohgap) ajStrAppendC(&cmd, " -nohgap"); ajStrAppendC(&cmd, " -maxdiv="); ajStrFromInt(&tmp, maxdiv); ajStrAppendS(&cmd, tmp); /* run clustalw */ /* ajFmtError("..%s..\n\n", ajStrGetPtr( cmd)); */ ajDebug("Executing '%S'\n", cmd); ajSysExecS(cmd); /* produce alignment file only if one was produced */ if(!only_dend) { /* read in tmp alignment output file to output through EMBOSS output */ seqin = ajSeqinNew(); /* ** add the Usa format to the start of the filename to tell EMBOSS ** format of file */ ajStrInsertC(&tmp_aln_outfile, 0, "msf::"); ajSeqinUsa(&seqin, tmp_aln_outfile); seqset = ajSeqsetNew(); if(ajSeqsetRead(seqset, seqin)) { ajSeqoutWriteSet(seqout, seqset); ajSeqoutClose(seqout); ajSeqinDel(&seqin); /* remove the Usa from the start of the string */ ajStrCutStart(&tmp_aln_outfile, 5); } else ajFmtError("Problem writing out EMBOSS alignment file\n"); } /* read in new tmp dend file (if produced) to output through EMBOSS */ if(tmp_dendfilename!=NULL) { tmp_dendfile = ajFileNewInNameS( tmp_dendfilename); if(tmp_dendfile!=NULL){ while(ajReadlineTrim(tmp_dendfile, &line)) ajFmtPrintF(dend_outfile, "%s\n", ajStrGetPtr( line)); ajFileClose(&tmp_dendfile); ajSysFileUnlinkS(tmp_dendfilename); } } ajSysFileUnlinkS(tmpFilename); if(!only_dend) ajSysFileUnlinkS(tmp_aln_outfile); ajStrDel(&pw_matrix); ajStrDel(&matrix); ajStrDel(&pw_dna_matrix); ajStrDel(&dna_matrix); ajStrDel(&tmp_dendfilename); ajStrDel(&dend_filename); ajStrDel(&tmp_aln_outfile); ajStrDel(&pwmstr); ajStrDel(&pwdstr); ajStrDel(&m1str); ajStrDel(&m2str); ajStrDel(&hgapres); ajStrDel(&cmd); ajStrDel(&tmp); ajStrDel(&tmpFilename); ajStrDel(&line); ajFileClose(&dend_outfile); ajFileClose(&tmp_dendfile); ajFileClose(&dend_file); ajFileClose(&pairwise_matrix); ajFileClose(&ma_matrix); ajSeqallDel(&seqall); ajSeqsetDel(&seqset); ajSeqDel(&seq); ajSeqoutDel(&seqout); ajSeqoutDel(&fil_file); ajSeqinDel(&seqin); embExit(); return 0; }
static void domainalign_ProcessStampFile(AjPStr in, AjPStr out, AjPDomain domain, ajint noden, AjPFile logf) { AjPFile outf = NULL; /* Output file pointer. */ AjPFile inf = NULL; /* Input file pointer. */ AjPStr temp1 = NULL; /* Temporary string. */ AjPStr temp2 = NULL; /* Temporary string. */ AjPStr temp3 = NULL; /* Temporary string. */ AjPStr line = NULL; /* Line of text from input file. */ ajint blk = 1; /* Count of the current block in the input file. Block 1 is the numbering and protein sequences, Block 2 is the secondary structure, Block 3 is the Very/Less/Post similar records*/ AjBool ok = ajFalse; /* Initialise strings. */ line = ajStrNew(); temp1 = ajStrNew(); temp2 = ajStrNew(); temp3 = ajStrNew(); /* Open input and output files. */ if(!(inf=ajFileNewInNameS(in))) ajFatal("Could not open input file in domainalign_ProcessStampFile"); /* Start of code for reading input file. Ignore everything up to first line beginning with 'Number'. */ while((ajReadlineTrim(inf,&line))) { /* ajFileReadLine will trim the tailing \n. */ if((ajStrGetCharPos(line, 1)=='\0')) { ok = ajTrue; break; } } /* Read rest of input file. */ if(ok) { /* Write DOMAIN classification records to file. */ if(!(outf=ajFileNewOutNameS(out))) ajFatal("Could not open output file in domainalign_ProcessStampFile"); if((domain->Type == ajSCOP)) { ajFmtPrintF(outf,"# TY SCOP\n# XX\n"); ajFmtPrintF(outf,"# CL %S",domain->Scop->Class); ajFmtPrintSplit(outf,domain->Scop->Fold,"\n# XX\n# FO ", 75," \t\n\r"); ajFmtPrintSplit(outf,domain->Scop->Superfamily,"# XX\n# SF ", 75," \t\n\r"); ajFmtPrintSplit(outf,domain->Scop->Family,"# XX\n# FA ", 75," \t\n\r"); ajFmtPrintF(outf,"# XX\n"); } else { ajFmtPrintF(outf,"# TY CATH\n# XX\n"); ajFmtPrintF(outf,"# CL %S",domain->Cath->Class); ajFmtPrintSplit(outf,domain->Cath->Architecture,"\n# XX\n# AR ", 75," \t\n\r"); ajFmtPrintSplit(outf,domain->Cath->Topology,"# XX\n# TP ",75, " \t\n\r"); ajFmtPrintSplit(outf,domain->Cath->Superfamily,"# XX\n# SF ",75, " \t\n\r"); ajFmtPrintF(outf,"# XX\n"); } if((domain->Type == ajSCOP)) { if(noden==1) ajFmtPrintF(outf,"# SI %d\n# XX",domain->Scop->Sunid_Class); else if(noden==2) ajFmtPrintF(outf,"# SI %d\n# XX",domain->Scop->Sunid_Fold); else if(noden==3) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Scop->Sunid_Superfamily); else if(noden==4) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Scop->Sunid_Family); else ajFatal("Node number error in domainalign_ProcessStampFile"); } else { if(noden==5) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Cath->Class_Id); else if(noden==6) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Cath->Arch_Id); else if(noden==7) ajFmtPrintF(outf,"# SI %d\n# XX",domain->Cath->Topology_Id); else if(noden==8) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Cath->Superfamily_Id); else if(noden==9) ajFmtPrintF(outf,"# SI %d\n# XX",domain->Cath->Family_Id); else ajFatal("Node number error in domainalign_ProcessStampFile"); } while((ajReadlineTrim(inf,&line))) { /* Increment counter for block of file. */ if((ajStrGetCharPos(line, 1)=='\0')) { blk++; if(blk==4) blk=1; continue; } /* Block of numbering line and protein sequences. */ if(blk==1) { /* Print the number line out as it is. */ if(ajStrPrefixC(line,"Number")) ajFmtPrintF(outf,"\n# %7s %S\n"," ", line); else { /* Read only the 7 characters of the domain identifier code in. */ ajFmtScanS(line, "%S", &temp1); ajStrAssignSubS(&temp2, temp1, 0, 6); /* Read the sequence. */ ajStrAssignSubS(&temp3, line, 13, 69); ajStrExchangeSetCC(&temp3, " ", "X"); ajFmtPrintF(logf, "Replaced ' ' in STAMP alignment " "with 'X'\n"); ajStrFmtUpper(&temp3); /* Write domain id code and sequence out. */ ajFmtPrintF(outf,"%-15S%7d %S%7d\n", temp2, 0, temp3, 0); } } /* Secondary structure filled with '????' (unwanted). */ else if(blk==2) { continue; } /* Similarity lines. */ else { if(ajStrPrefixC(line,"Post")) { /* Read the sequence. */ ajStrAssignSubS(&temp3, line, 13, 69); /* Write post similar line out. */ ajFmtPrintF(outf,"%-15s%7s %S\n","# Post_similar", " ", temp3); } /* Ignore Very and Less similar lines. */ else continue; } } } else /* ok == ajFalse. */ { ajWarn("\n***********************************************\n" "* STAMP was called but output file was EMPTY! *\n" "* NO OUTPUT FILE GENERATED FOR THIS NODE. *\n" "***********************************************\n"); ajFmtPrintF(logf, "STAMP called but output file empty. " "No output file for this node!"); } /* Clean up and close input and output files. */ ajFileClose(&outf); ajFileClose(&inf); ajStrDel(&line); ajStrDel(&temp1); ajStrDel(&temp2); ajStrDel(&temp3); /* All done. */ return; }
AjBool ajDebugTest(const char* token) { AjPStr filename = NULL; const char* debugtestname = ".debugtest"; char* ctoken = NULL; AjPStr line = NULL; AjPStr strtoken = NULL; AjPStr rest = NULL; static ajint depth = 0; struct { ajuint count; ajuint max; } *stats; if(depth) return ajFalse; depth++; if(!messDebugTestInit) { filename = ajStrNewC(debugtestname); if(ajFilenameExists(filename)) { messDebugTestFile = ajFileNewInNameS(filename); } else { ajFmtPrintS(&filename, "%s%s%s", getenv("HOME"), SLASH_STRING, debugtestname); if(ajFilenameExists(filename)) messDebugTestFile = ajFileNewInNameS(filename); } ajStrDel(&filename); if(messDebugTestFile) { messDebugTestTable = ajTablecharNewLen(256); while(ajReadlineTrim(messDebugTestFile, &line)) { if(ajStrExtractFirst(line, &rest, &strtoken)) { AJNEW0(stats); ctoken = ajCharNewS(strtoken); if(ajStrIsInt(rest)) ajStrToUint(rest, &stats->max); else stats->max = UINT_MAX; ajTablePut(messDebugTestTable, ctoken, stats); ctoken = NULL; stats = NULL; } } ajStrDel(&line); ajStrDel(&strtoken); ajStrDel(&rest); ajFileClose(&messDebugTestFile); } messDebugTestInit = ajTrue; } depth--; if(!messDebugTestTable) return ajFalse; depth++; stats = ajTableFetch(messDebugTestTable, token); depth--; if(!stats) return ajFalse; if(!stats->max) return ajTrue; if(stats->count++ >= stats->max) return ajFalse; return ajTrue; }
static void jaspscan_ParseInput(const AjPStr dir, const AjPStr jaspdir, const AjPStr mats, const AjPStr excl, ajuint *recurs, AjPList ret) { ajuint nm = 0; ajuint ne = 0; AjPStr *carr = NULL; AjPStr *earr = NULL; AjPFile inf = NULL; AjPStr line = NULL; AjPStr comm = NULL; AjPStr val = NULL; ajuint i; ajuint j; char c; ajuint rlen = 0; if(*recurs > JASPSCAN_RECURS) ajFatal("Too many recursion levels in matrix list files"); line = ajStrNew(); comm = ajStrNew(); if(mats) { nm = ajArrCommaList(mats,&carr); for(i=0; i < nm; ++i) { if(ajStrGetCharFirst(carr[i]) != '@') ajStrFmtUpper(&carr[i]); if(ajStrMatchC(carr[i],"ALL")) { jaspscan_GetFileList(dir, jaspdir, "*", ret); ajListSortUnique(ret, ajStrVcmp, jaspscan_strdel); } else if(ajStrGetCharFirst(carr[i]) == '@') { ajStrTrimStartC(&carr[i],"@"); inf = ajFileNewInNameS(carr[i]); if(!inf) ajFatal("Cannot open list file %S",carr[i]); while(ajReadlineTrim(inf,&line)) { ajStrRemoveWhite(&line); c = ajStrGetCharFirst(line); if(c == '#' || c== '!') continue; if(ajStrGetLen(comm)) ajStrAppendC(&comm,","); ajStrFmtUpper(&line); ajStrAppendS(&comm,line); } *recurs += 1; jaspscan_ParseInput(dir,jaspdir,comm,NULL,recurs,ret); *recurs -= 1; ajListSortUnique(ret, ajStrVcmp, jaspscan_strdel); ajFileClose(&inf); } else { jaspscan_GetFileList(dir,jaspdir,ajStrGetPtr(carr[i]),ret); ajListSortUnique(ret, ajStrVcmp, jaspscan_strdel); } } for(i=0; i < nm; ++i) ajStrDel(&carr[i]); AJFREE(carr); } if(excl) { ne = ajArrCommaList(excl,&earr); for(i=0; i < ne; ++i) { if(ajStrGetCharFirst(earr[i]) != '@') ajStrFmtUpper(&earr[i]); if(ajStrGetCharFirst(earr[i]) == '@') { ajStrTrimStartC(&earr[i],"@"); inf = ajFileNewInNameS(earr[i]); if(!inf) ajFatal("Cannot open list file %S",earr[i]); while(ajReadlineTrim(inf,&line)) { ajStrRemoveWhite(&line); c = ajStrGetCharFirst(line); if(c == '#' || c== '!') continue; if(ajStrGetLen(comm)) ajStrAppendC(&comm,","); ajStrFmtUpper(&line); ajStrAppendS(&comm,line); } *recurs += 1; jaspscan_ParseInput(dir,jaspdir,NULL,comm,recurs,ret); *recurs -= 1; ajListSortUnique(ret, ajStrVcmp, jaspscan_strdel); ajFileClose(&inf); } else { ajStrAssignS(&line,earr[i]); ajStrAppendC(&line,J_EXT); rlen = ajListGetLength(ret); for(j=0; j < rlen; ++j) { ajListPop(ret,(void **)&val); if(ajStrSuffixS(val,line)) ajStrDel(&val); else ajListPushAppend(ret,(void *)val); } } } for(i=0; i < ne; ++i) ajStrDel(&earr[i]); AJFREE(earr); } ajStrDel(&line); ajStrDel(&comm); return; }
int main(int argc, char **argv) { embInitPV("kdnapenny", argc, argv, "KBWS", "1.0.8"); struct soap soap; char* jobid; char* result; AjPSeqall seqall; AjPSeq seq; AjPFile outf; AjPStr substr; AjPStr inseq = NULL; seqall = ajAcdGetSeqall("seqall"); outf = ajAcdGetOutfile("outfile"); AjPStr tmp = NULL; AjPStr tmpFileName = NULL; AjPSeqout fil_file; AjPStr line = NULL; AjPStr sizestr = NULL; ajint thissize = 0; ajint nb = 0; AjBool are_prot = ajFalse; ajint size = 0; AjPFile infile; tmp = ajStrNewC("fasta"); fil_file = ajSeqoutNew(); tmpFileName = getUniqueFileName(); if( !ajSeqoutOpenFilename(fil_file, tmpFileName) ) { embExitBad(); } ajSeqoutSetFormatS(fil_file, tmp); while (ajSeqallNext(seqall, &seq)) { if (!nb) { are_prot = ajSeqIsProt(seq); } ajSeqoutWriteSeq(fil_file, seq); ++nb; } ajSeqoutClose(fil_file); ajSeqoutDel(&fil_file); if (nb < 2) { ajFatal("Multiple alignments need at least two sequences"); } infile = ajFileNewInNameS(tmpFileName); while (ajReadline(infile, &line)) { ajStrAppendS(&inseq,line); ajStrAppendC(&inseq,"\n"); } soap_init(&soap); char* in0; in0 = ajCharNewS(inseq); if ( soap_call_ns1__runDnapenny( &soap, NULL, NULL, in0, &jobid ) == SOAP_OK ) { fprintf(stderr,"Jobid: %s\n",jobid); } else { soap_print_fault(&soap, stderr); } int check = 0; while ( check == 0 ) { if ( soap_call_ns1__checkStatus( &soap, NULL, NULL, jobid, &check ) == SOAP_OK ) { fprintf(stderr,"*"); } else { soap_print_fault(&soap, stderr); } sleep(3); } fprintf(stderr, "\n"); if ( soap_call_ns1__getResult( &soap, NULL, NULL, jobid, &result ) == SOAP_OK ) { substr = ajStrNewC(result); ajFmtPrintF(outf,"%S\n",substr); } else { soap_print_fault(&soap, stderr); } ajSysFileUnlinkS(tmpFileName); soap_destroy(&soap); soap_end(&soap); soap_done(&soap); ajFileClose(&outf); ajSeqallDel(&seqall); ajSeqDel(&seq); ajStrDel(&substr); embExit(); return 0; }
/* @prog domainreso *********************************************************** ** ** Removes low resolution domains from a DCF file (domain ** classification file). ** ******************************************************************************/ int main(ajint argc, char **argv) { AjPList cpdb_path = NULL; /* Location of coordinate files for input */ AjPStr cpdb_name = NULL; /* Name of coordinate file */ AjPStr temp = NULL; /* temp string */ AjPStr temp2 = NULL; /* temp string */ AjPList entry = NULL; /* List of pdb codes with resolution */ /* ABOVE the threshold */ AjPStr *entryarr = NULL; /* entry as an array */ AjPFile fptr_cpdb = NULL; /* Pointer to current coordinate file */ AjPFile dcfin = NULL; /* DCF input file */ AjPFile dcfout = NULL; /* DCF output file */ AjPPdb pdb = NULL; /* Pdb object pointer */ AjPDomain domain = NULL; /* Domain structure */ float threshold = 0.0; /* Resolution threshold */ ajint num = 0; /* number of nodes in list */ ajint type = 0; /* Type of domain (ajSCOP or ajCATH) in the DCF file */ /* Read data from acd */ embInitPV("domainreso",argc,argv,"DOMAINATRIX",VERSION); cpdb_path = ajAcdGetDirlist("cpdbpath"); threshold = ajAcdGetFloat("threshold"); dcfin = ajAcdGetInfile("dcfinfile"); dcfout = ajAcdGetOutfile("dcfoutfile"); /* Allocate strings etc. */ cpdb_name = ajStrNew(); temp = ajStrNew(); /* Create list . */ entry = ajListNew(); /* Create list of files in CPDB directory. */ /* Determine number of nodes on list */ num = ajListGetLength(cpdb_path); /* domainreso reads a directory of clean coordinate files file, creates a list of the files, then reads every list entry and extracts the resolution of the structure. If the value is less than a threshold (user defined) then the domain identifier is pushed onto a list. The DCF file (domain classification file) is then read and domain identifiers compared to those on the list, if found then the domain structure data is written the new DCF file. */ type = ajDomainDCFType(dcfin); /* Start of main application loop */ /* Produce list of pdb codes with resolution */ /* ABOVE the threshold. */ while(ajListPop(cpdb_path,(void **)&temp)) { /* Open coordinate file. */ if((fptr_cpdb=ajFileNewInNameS(temp))==NULL) { ajWarn("Could not open cpdb file"); ajStrDel(&temp); continue; } ajFmtPrint("%S\n", temp); fflush(stdout); /* Read coordinate data file. */ pdb = ajPdbReadFirstModelNew(fptr_cpdb); /* Check if resolution is above threshold. */ if(pdb->Reso > threshold) { /* assign ID to list. */ temp2=ajStrNew(); ajStrAssignS(&temp2, pdb->Pdb); ajListPush(entry, (AjPStr) temp2); } /* Close coordinate file and tidy up*/ ajPdbDel(&pdb); ajFileClose(&fptr_cpdb); ajStrDel(&temp); } num = ajListGetLength(entry); /* Sort the list of pdb codes & convert to an array. */ ajListSort(entry, domainreso_StrComp); ajListToarray(entry, (void ***)&entryarr); /* Read DCF file and compare IDs to those in list if not present then write domain structure data to output. . */ while((domain=(ajDomainReadCNew(dcfin, "*", type)))) { /* DOMAIN id not found in the list of domains with resolution above the threshold, so include it in the output file. */ if((domainreso_StrBinSearchDomain(ajDomainGetId(domain), entryarr, num))==-1) ajDomainWrite(dcfout, domain); /* Delete domain structure. */ ajDomainDel(&domain); } /* Tidy up. */ ajStrDel(&temp2); ajStrDel(&cpdb_name); ajFileClose(&dcfout); ajFileClose(&dcfin); ajListFree(&cpdb_path); ajListFree(&entry); AJFREE(entryarr); /* Return. */ ajExit(); return 0; }
int main(int argc, char *argv[]) { embInitPV("genret", argc, argv, "GEMBASSY", "1.0.3"); AjPSeqall seqall; AjPSeq seq = NULL; AjPStr inseq = NULL; AjPStr gene = NULL; AjPStr access = NULL; AjBool accid = ajTrue; AjPStr argument = NULL; AjPFile outfile = NULL; AjPStr seqid = NULL; AjPStr restid = NULL; AjBool valid = ajFalse; AjBool isseq = ajFalse; AjBool isgbk = ajFalse; AjPFilebuff buff = NULL; AjPFile tmpfile = NULL; AjPStr tmpname = NULL; AjPStr regexstr = NULL; AjPStrTok token = NULL; AjPRegexp regex = NULL; AjPStr url = NULL; AjPStr base = NULL; AjPStr head = NULL; AjPStr line = NULL; seqall = ajAcdGetSeqall("sequence"); access = ajAcdGetString("access"); gene = ajAcdGetString("gene"); argument = ajAcdGetString("argument"); accid = ajAcdGetBoolean("accid"); outfile = ajAcdGetOutfile("outfile"); if( ajStrMatchC(access, "translation") || ajStrMatchC(access, "get_exon") || ajStrMatchC(access, "get_exons") || ajStrMatchC(access, "get_cdsseq") || ajStrMatchC(access, "get_gbkseq") || ajStrMatchC(access, "get_geneseq") || ajStrMatchC(access, "get_intron") || ajStrMatchC(access, "getseq") || ajStrMatchC(access, "seq") || ajStrMatchC(access, "around_startcodon") || ajStrMatchC(access, "around_stopcodon") || ajStrMatchC(access, "before_startcodon") || ajStrMatchC(access, "before_stopcodon") || ajStrMatchC(access, "after_startcodon") || ajStrMatchC(access, "after_stopcodon") ) { isseq = ajTrue; } else if(ajStrMatchC(access, "annotate") || ajStrMatchC(access, "output")) { isgbk = ajTrue; } else { ajFmtPrintF(outfile, "gene,%S\n", access); } base = ajStrNewC("rest.g-language.org"); ajStrExchangeCC(&argument, " ", "/"); ajStrExchangeCC(&argument, ",", "/"); ajStrExchangeCC(&argument, "\t", "/"); ajStrExchangeCC(&argument, "\r", "/"); ajStrExchangeCC(&argument, "\n", "/"); if(ajStrMatchC(gene, "*")) { ajStrInsertK(&gene, 0, '.'); } if(ajStrPrefixC(gene, "@") || ajStrPrefixC(gene, "list::")) { ajStrExchangeCC(&gene, "@", ""); ajStrExchangeCC(&gene, "list::", ""); ajStrAssignS(&tmpname, gene); tmpfile = ajFileNewInNameS(tmpname); if(!tmpfile) { ajDie("List file (%S) open error\n", tmpname); } gene = ajStrNew(); while(ajReadline(tmpfile, &line)) { ajStrAppendS(&gene, line); } ajFileClose(&tmpfile); ajStrDel(&tmpname); ajStrDel(&line); } tmpname = ajStrNew(); gAssignUniqueName(&tmpname); while(ajSeqallNext(seqall, &seq)) { inseq = ajStrNew(); if(!accid) { if(gFormatGenbank(seq, &inseq)) { tmpfile = ajFileNewOutNameS(tmpname); if(!tmpfile) { ajDie("Output file (%S) open error\n", tmpname); } ajFmtPrintF(tmpfile, "%S", inseq); ajFileClose(&tmpfile); ajFmtPrintS(&url, "http://%S/upload/upl.pl", base); gFilePostSS(url, tmpname, &restid); ajStrDel(&url); ajSysFileUnlinkS(tmpname); } else { ajWarn("Sequence does not have features\n" "Proceeding with sequence accession ID\n"); accid = ajTrue; } } ajStrAssignS(&seqid, ajSeqGetAccS(seq)); if(ajStrGetLen(seqid) == 0) { ajStrAssignS(&seqid, ajSeqGetNameS(seq)); } if(ajStrGetLen(seqid) == 0) { ajWarn("No valid header information\n"); } if(accid) { ajStrAssignS(&restid, seqid); if(ajStrGetLen(seqid) == 0) { ajDie("Cannot proceed without header with -accid\n"); } if(!gValID(seqid)) { ajDie("Invalid accession ID:%S, exiting\n", seqid); } } url = ajStrNew(); if(isgbk) { ajFmtPrintS(&url, "http://%S/%S/%S", base, restid, access); } else { ajFmtPrintS(&url, "http://%S/%S/*/%S/%S", base, restid, access, argument); } if(!gFilebuffURLS(url, &buff)) { ajDie("GET error from %S\n", url); } while(ajBuffreadLine(buff, &line)) { if(isgbk){ ajFmtPrintF(outfile, "%S", line); continue; } ajStrRemoveLastNewline(&line); regex = ajRegCompC("^>"); if(ajRegExec(regex, line)) { head = ajStrNew(); ajStrAssignS(&head, line); ajStrTrimStartC(&head, ">"); valid = ajFalse; token = ajStrTokenNewC(ajStrNewS(gene), " ,\t\r\n"); while(ajStrTokenNextParse(token, ®exstr)) { if(ajStrGetLen(regexstr)) { regex = ajRegComp(regexstr); if(ajRegExec(regex, line)) { valid = ajTrue; if(ajStrIsAlnum(regexstr)) { ajStrExchangeSC(&gene, regexstr, ""); } } ajRegFree(®ex); } } } else { if(valid) { if(isseq) { ajStrFmtWrap(&line, 60); ajFmtPrintF(outfile, ">%S\n%S\n", head, line); } else { ajFmtPrintF(outfile, "%S,%S\n", head, line); } valid = ajFalse; } } } ajFileClose(&outfile); ajStrDel(&restid); ajStrDel(&seqid); ajStrDel(&inseq); } ajSeqallDel(&seqall); ajSeqDel(&seq); ajStrDel(&access); ajStrDel(&gene); embExit(); }
int main(int argc, char **argv) { embInitPV("kclustalw", argc, argv, "KBWS", "1.0.8"); struct soap soap; struct ns1__clustalwInputParams params; char* jobid; char* result; AjPSeqall seqall; AjPSeq seq; AjPFile outf; AjPFile outf_dnd; AjPStr substr; AjPStr inseq = NULL; AjPStr alignment; AjPStr output; AjPStr matrix; AjPStr outorder; ajint ktup; ajint window; ajint gapopen; float gapext; ajint gapdist; AjBool endgaps; ajint pairgap; ajint topdiags; AjPStr score; AjBool tossgaps; AjBool kimura; AjPStr outputtree; AjBool tree; AjBool quicktree; AjBool align; AjPStr clustering; ajint numiter; AjPStr iteration; alignment = ajAcdGetString("alignment"); output = ajAcdGetString("output"); matrix = ajAcdGetString("matrix"); outorder = ajAcdGetString("outorder"); ktup = ajAcdGetInt("ktup"); window = ajAcdGetInt("window"); gapopen = ajAcdGetInt("gapopen"); gapext = ajAcdGetFloat("gapext"); gapdist = ajAcdGetInt("gapdist"); endgaps = ajAcdGetBoolean("endgaps"); pairgap = ajAcdGetInt("pairgap"); topdiags = ajAcdGetInt("topdiags"); score = ajAcdGetString("score"); tossgaps = ajAcdGetBoolean("tossgaps"); kimura = ajAcdGetBoolean("kimura"); outputtree = ajAcdGetString("outputtree"); tree = ajAcdGetBoolean("tree"); quicktree = ajAcdGetBoolean("quicktree"); align = ajAcdGetBoolean("align"); clustering = ajAcdGetString("clustering"); numiter = ajAcdGetInt("numiter"); iteration = ajAcdGetString("iteration"); seqall = ajAcdGetSeqall("seqall"); outf = ajAcdGetOutfile("outfile"); outf_dnd = ajAcdGetOutfile("dndoutfile"); params.alignment = ajCharNewS(alignment); params.output = ajCharNewS(output); params.matrix = ajCharNewS(matrix); params.outorder = ajCharNewS(outorder); params.ktup = ktup; params.window = window; params.gapopen = gapopen; params.gapext = gapext; params.gapdist = gapdist; if (endgaps) { params.endgaps = xsd__boolean__true_; } else { params.endgaps = xsd__boolean__false_; } params.pairgap = pairgap; params.topdiags = topdiags; params.score = ajCharNewS(score); if (tossgaps) { params.tossgaps = xsd__boolean__true_; } else { params.tossgaps = xsd__boolean__false_; } if (kimura) { params.kimura = xsd__boolean__true_; } else { params.kimura = xsd__boolean__false_; } params.outputtree = ajCharNewS(outputtree); if (tree) { params.tree = xsd__boolean__true_; } else { params.tree = xsd__boolean__false_; } if (quicktree) { params.quicktree = xsd__boolean__true_; } else { params.quicktree = xsd__boolean__false_; } if (align) { params.align = xsd__boolean__true_; } else { params.align = xsd__boolean__false_; } params.clustering = ajCharNewS(clustering); params.numiter = numiter; params.iteration = ajCharNewS(iteration); AjPStr tmp = NULL; AjPStr tmpFileName = NULL; AjPSeqout fil_file; AjPStr line = NULL; /* if "AjPStr line; -> ajReadline is not success!" */ AjPStr sizestr = NULL; ajint thissize; ajint nb = 0; AjBool are_prot = ajFalse; ajint size = 0; AjPFile infile; tmp = ajStrNewC("fasta"); fil_file = ajSeqoutNew(); tmpFileName = getUniqueFileName(); if( !ajSeqoutOpenFilename(fil_file, tmpFileName) ) { embExitBad(); } ajSeqoutSetFormatS(fil_file, tmp); while (ajSeqallNext(seqall, &seq)) { if (!nb) { are_prot = ajSeqIsProt(seq); } ajSeqoutWriteSeq(fil_file, seq); ++nb; } ajSeqoutClose(fil_file); ajSeqoutDel(&fil_file); if (nb < 2) { ajFatal("Multiple alignments need at least two sequences"); } infile = ajFileNewInNameS(tmpFileName); while (ajReadline(infile, &line)) { ajStrAppendS(&inseq,line); ajStrAppendC(&inseq,"\n"); } soap_init(&soap); char* in0; in0 = ajCharNewS(inseq); if ( soap_call_ns1__runClustalw( &soap, NULL, NULL, in0, ¶ms, &jobid ) == SOAP_OK ) { fprintf(stderr,"Jobid: %s\n",jobid); } else { soap_print_fault(&soap, stderr); } int check = 0; while ( check == 0 ) { if ( soap_call_ns1__checkStatus( &soap, NULL, NULL, jobid, &check ) == SOAP_OK ) { fprintf(stderr,"*"); } else { soap_print_fault(&soap, stderr); } sleep(3); } fprintf(stderr,"\n"); char* type; type = "out"; if(soap_call_ns1__getMultiResult( &soap, NULL, NULL, jobid, type, &result )== SOAP_OK) { substr = ajStrNewC(result); fprintf(stdout, "%s\n", ajStrGetPtr(substr)); } else { soap_print_fault(&soap, stderr); } type = "aln"; if(soap_call_ns1__getMultiResult( &soap, NULL, NULL, jobid, type, &result )== SOAP_OK) { substr = ajStrNewC(result); ajFmtPrintF(outf,"%S\n",substr); } else { soap_print_fault(&soap, stderr); } type = "dnd"; if(soap_call_ns1__getMultiResult( &soap, NULL, NULL, jobid, type, &result )== SOAP_OK) { substr = ajStrNewC(result); ajFmtPrintF(outf_dnd,"%S\n",substr); } else { soap_print_fault(&soap, stderr); } ajSysFileUnlinkS(tmpFileName); soap_destroy(&soap); soap_end(&soap); soap_done(&soap); ajFileClose(&outf_dnd); ajFileClose(&outf); ajSeqallDel(&seqall); ajSeqDel(&seq); ajStrDel(&substr); embExit(); return 0; }
int main(ajint argc, char **argv) { AjPList pdb_path =NULL; /* Path of pdb files */ AjPStr pdb_name =NULL; /* Name of pdb file */ AjPDirout ccf_path =NULL; /* Path of ccf files */ AjPStr ccf_name =NULL; /* Name of ccf file */ AjPStr pdbid =NULL; /* PDB code */ AjPStr pdbid_temp =NULL; /* PDB code */ AjBool ccfnaming =ajFalse; /* True == use the pdbid code to name the output file, False== use the name of the original pdb file*/ /* Mask non-amino acid groups in protein chains that do not contain a C-alpha atom. The group will not appear in either the CO or SQ records of the clean coordinate file */ AjBool camask =ajFalse; /* Mask amino acids in protein chains that do not contain a C-alpha atom. The amino acid will appear not appear in the CO record but will still be present in the SQ record of the clean coordinate file */ AjBool camask1 =ajFalse; /* Mask residues or groups in protein chains with a single atom only */ AjBool atommask =ajFalse; AjPStr temp =NULL; /* Temp string */ AjPStr msg =NULL; /* Error message */ AjPStr base_name =NULL; /* Name of pdb file w/o path or extension */ AjPFile pdb_inf =NULL; /* pdb input file pointer */ AjPFile ccf_outf =NULL; /* ccf output file pointer */ AjPFile logf =NULL; /* log file pointer*/ AjPPdb pdb =NULL; /* Pdb structure (for parsed data) */ ajint min_chain_size=0; /* Minimum length of a SEQRES chain for it to be parsed */ ajint max_mismatch=0; /* Max. no. residues to trim when checking for missing C-terminal SEQRES residues. */ ajint max_trim=0; /* Maximum number of permissible mismatches between the ATOM and SEQRES sequences */ ajint pos =0; /* Location of the file extension in the pdb file name */ /* THIS_DIAGNOSTIC tempstr=ajStrNew(); ajStrAssignC(&tempstr, "diagnostics"); tempfile=ajFileNewOutNameS(tempstr); ajStrDel(&tempstr);*/ /* Initialise strings */ ccf_name = ajStrNew(); pdb_name = ajStrNew(); temp = ajStrNew(); msg = ajStrNew(); base_name = ajStrNew(); pdbid = ajStrNew(); pdbid_temp = ajStrNew(); /* Read data from acd */ embInitPV("pdbparse",argc,argv,"STRUCTURE",VERSION); pdb_path = ajAcdGetDirlist("pdbpath"); ccf_path = ajAcdGetOutdir("ccfoutdir"); logf = ajAcdGetOutfile("logfile"); min_chain_size=ajAcdGetInt("chnsiz"); max_mismatch =ajAcdGetInt("maxmis"); max_trim =ajAcdGetInt("maxtrim"); ccfnaming = ajAcdGetBoolean("ccfnaming"); camask = ajAcdGetBoolean("camask"); camask1 = ajAcdGetBoolean("camaska"); atommask = ajAcdGetBoolean("atommask"); /* Check directories*/ /*Start of main application loop*/ while(ajListPop(pdb_path,(void **)&temp)) { ajFmtPrint("Processing %S\n", temp); ajFmtPrintF(logf, "%S\n", temp); /* Read pdb file*/ if((pdb_inf=ajFileNewInNameS(temp))==NULL) { ajFmtPrintS(&msg, "Could not open for reading %S ", temp); ajWarn(ajStrGetPtr(msg)); ajFmtPrintF(logf, "%-15s%S\n//\n", "FILE_OPEN", temp); ajStrDel(&temp); continue; } /* Assign pdb id code from file name */ ajStrAssignS(&pdbid, temp); ajFilenameTrimPathExt(&pdbid); if(MAJSTRGETLEN(pdbid)>4) { /* The file name is longer than expected (and probably contains a prefix). Take the last four characters to be the pdbid code */ ajStrAssignSubS(&pdbid_temp, pdbid, pos-4, pos-1); ajStrAssignS(&pdbid, pdbid_temp); } else if(MAJSTRGETLEN(pdbid)<4) ajFatal("Could not determine pdbid code from file name (%S)", pdbid); /* Parse pdb file and write pdb structure */ if(!(pdb=ajPdbReadRawNew(pdb_inf, pdbid, min_chain_size, max_mismatch, max_trim, camask, camask1, atommask, logf))) { ajFmtPrintS(&msg, "Clean coordinate file not generated for %S", temp); ajWarn(ajStrGetPtr(msg)); ajFmtPrintF(logf, "%-15s%S\n//\n", "NO_OUTPUT", temp); ajFileClose(&pdb_inf); ajStrDel(&temp); continue; } /* Open clean coordinate file for writing*/ if(ccfnaming) ajStrAssignS(&ccf_name, pdb->Pdb); else ajStrAssignS(&ccf_name, temp); ajStrFmtLower(&ccf_name); if(!(ccf_outf=ajFileNewOutNameDirS(ccf_name, ccf_path))) { ajFmtPrintS(&msg, "Could not open %S for writing", ccf_name); ajWarn(ajStrGetPtr(msg)); ajFmtPrintF(logf, "%-15s%S\n//\n", "FILE_OPEN", ccf_name); ajFileClose(&pdb_inf); ajPdbDel(&pdb); ajStrDel(&temp); continue; } /* Write pdb file */ if(!ajPdbWriteAll(ccf_outf, pdb)) { ajFmtPrintS(&msg, "Could not write file %S", ccf_name); ajWarn(ajStrGetPtr(msg)); ajFmtPrintF(logf, "%-15s%S\n//\n", "FILE_WRITE", ccf_name); ajFmtPrintS(&temp, "rm %S", ccf_name); ajFmtPrint("%S", temp); ajSysSystem(temp); ajFileClose(&pdb_inf); ajFileClose(&ccf_outf); ajPdbDel(&pdb); ajStrDel(&temp); continue; } /* Tidy up*/ ajFileClose(&pdb_inf); ajFileClose(&ccf_outf); ajPdbDel(&pdb); ajStrDel(&temp); ajFmtPrintF(logf, "//\n"); } /*End of main application loop*/ /*Tidy up */ ajListFree(&pdb_path); ajStrDel(&pdb_name); ajDiroutDel(&ccf_path); ajStrDel(&ccf_name); ajStrDel(&base_name); ajStrDel(&pdbid); ajStrDel(&pdbid_temp); ajStrDel(&msg); ajFileClose(&logf); /* DIAGNOSTIC ajFileClose(&tempfile); */ /* Return */ ajExit(); return 0; }
/* @prog seqnr ************************************************************** ** ** Removes redundancy from DHF files (domain hits files) or other files of ** sequences. ** ****************************************************************************/ int main(int argc, char **argv) { /* Variable declarations */ AjPList in = NULL; /* Names of domain hits files (input). */ AjPStr inname = NULL; /* Full name of the current DHF file. */ AjPFile inf = NULL; /* Current DHF file. */ EmbPHitlist infhits = NULL; /* Hitlist from DHF file */ AjBool dosing = ajFalse; /* Filter using singlet sequences. */ AjPDir singlets = NULL; /* Singlets (input). */ AjBool dosets = ajFalse; /* Filter using sets of sequences. */ AjPDir insets = NULL; /* Sets (input). */ AjPStr mode = NULL; /* Mode of operation */ ajint moden = 0; /* Mode 1: single threshold for redundancy removal, 2: lower and upper thresholds for redundancy removal. */ float thresh = 0.0; /* Threshold for non-redundancy. */ float threshlow = 0.0; /* Threshold (lower limit). */ float threshup = 0.0; /* Threshold (upper limit). */ AjPMatrixf matrix = NULL; /* Substitution matrix. */ float gapopen = 0.0; /* Gap insertion penalty. */ float gapextend = 0.0; /* Gap extension penalty. */ AjPDirout out = NULL; /* Domain hits files (output). */ AjPFile outf = NULL; /* Current DHF file (output). */ AjBool dored = ajFalse; /* True if redundant hits are output. */ AjPDirout outred = NULL; /* DHF files for redundant hits (output).*/ AjPFile redf = NULL; /* Current DHF file redundancy (output). */ AjPStr outname = NULL; /* Name of output file (re-used). */ AjPFile logf = NULL; /* Log file pointer. */ AjBool ok = ajFalse; /* Housekeeping. */ AjPSeqset seqset = NULL; /* Seqset (re-used). */ AjPSeqin seqin = NULL; /* Seqin (re-used). */ AjPList seq_list = NULL; /* Main list for redundancy removal. */ EmbPDmxNrseq seq_tmp = NULL; /* Temp. pointer for making seq_list. */ ajint seq_siz = 0; /* Size of seq_list. */ AjPUint keep = NULL; /* 1: Sequence in seq_list was classed as non-redundant, 0: redundant. */ AjPUint nokeep = NULL; /* Inversion of keep array. */ ajint nseqnr = 0; /* No. non-redundant seqs. in seq_list. */ AjPStr filtername= NULL; /* Name of filter file (re-used). */ AjPFile filterf = NULL; /* Current filter file. */ EmbPHitlist hitlist = NULL; /* Hitlist from input file (re-used). */ AjPScopalg scopalg = NULL; /* Scopalg from input file. */ ajint x = 0; /* Housekeeping. */ /* Read data from acd. */ embInitPV("seqnr",argc,argv,"DOMSEARCH",VERSION); in = ajAcdGetDirlist("dhfinpath"); dosing = ajAcdGetToggle("dosing"); singlets = ajAcdGetDirectory("singletsdir"); dosets = ajAcdGetToggle("dosets"); insets = ajAcdGetDirectory("insetsdir"); mode = ajAcdGetListSingle("mode"); thresh = ajAcdGetFloat("thresh"); threshlow = ajAcdGetFloat("threshlow"); threshup = ajAcdGetFloat("threshup"); matrix = ajAcdGetMatrixf("matrix"); gapopen = ajAcdGetFloat("gapopen"); gapextend = ajAcdGetFloat("gapextend"); out = ajAcdGetOutdir("dhfoutdir"); dored = ajAcdGetToggle("dored"); outred = ajAcdGetOutdir("redoutdir"); logf = ajAcdGetOutfile("logfile"); /* Housekeeping. */ filtername = ajStrNew(); outname = ajStrNew(); if(!(ajStrToInt(mode, &moden))) ajFatal("Could not parse ACD node option"); /* Process each DHF (input) in turn. */ while(ajListPop(in,(void **)&inname)) { ajFmtPrint("Processing %S\n", inname); ajFmtPrintF(logf, "//\n%S\n", inname); seq_list = ajListNew(); keep = ajUintNew(); nokeep = ajUintNew(); /**********************************/ /* Open DHF file */ /**********************************/ if((inf = ajFileNewInNameS(inname)) == NULL) ajFatal("Could not open DHF file %S", inname); /* Read DHF file. */ ok = ajFalse; if(!(infhits = embHitlistReadFasta(inf))) { ajWarn("embHitlistReadFasta call failed in seqnr"); ajFmtPrintF(logf, "embHitlistReadFasta call failed in seqnr\n"); /* Read sequence set instead. */ seqset = ajSeqsetNew(); seqin = ajSeqinNew(); ajSeqinUsa(&seqin, inname); if(!(ajSeqsetRead(seqset, seqin))) ajFatal("SeqsetRead failed in seqsearch_psialigned"); if(ajSeqsetGetSize(seqset)) ok = ajTrue; } else if(infhits->N) ok = ajTrue; /* Close DHF file. */ ajFileClose(&inf); /* Process empty DHF files (should never occur). */ if(!ok) { ajWarn("Empty input file %S\n", inname); ajFmtPrintF(logf, "Empty input file %S\n", inname); if(infhits) embHitlistDel(&infhits); if(seqset) ajSeqsetDel(&seqset); if(seqin) ajSeqinDel(&seqin); continue; } /* 1. Create list of sequences from the main input directory.. */ if(infhits) { for(x=0; x<infhits->N; x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); ajStrAssignS(&seq_tmp->Seq->Acc,infhits->hits[x]->Acc); ajStrAssignS(&seq_tmp->Seq->Seq,infhits->hits[x]->Seq); ajListPushAppend(seq_list,seq_tmp); } } else { for(x=0;x<ajSeqsetGetSize(seqset);x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x)); ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x)); ajListPushAppend(seq_list,seq_tmp); } ajSeqsetDel(&seqset); ajSeqinDel(&seqin); } /**********************************/ /* Open singlets filter file */ /**********************************/ if(dosing) { /* Open singlets file. */ ajStrAssignS(&filtername, inname); ajFilenameTrimPathExt(&filtername); ajStrInsertS(&filtername, 0, ajDirGetPath(singlets)); ajStrAppendC(&filtername, "."); ajStrAppendS(&filtername, ajDirGetExt(singlets)); if((filterf = ajFileNewInNameS(filtername)) == NULL) { ajWarn("Could not open DHF file %S", filtername); ajFmtPrint("Could not open singlets filter file %S", filtername); } else { /* Read DHF file. */ ok = ajFalse; if(!(hitlist = embHitlistReadFasta(filterf))) { ajWarn("embHitlistReadFasta call failed in seqnr"); ajFmtPrintF(logf, "embHitlistReadFasta call failed in seqnr\n"); /* Read sequence set instead. */ seqset = ajSeqsetNew(); seqin = ajSeqinNew(); ajSeqinUsa(&seqin, inname); if(!(ajSeqsetRead(seqset, seqin))) ajFatal("SeqsetRead failed in seqnr"); if(ajSeqsetGetSize(seqset)) ok = ajTrue; } else if(hitlist->N) ok = ajTrue; /* Close DHF file. */ ajFileClose(&filterf); /* Process empty DHF files (should never occur). */ if(!ok) { ajWarn("Empty singlets filter file %S\n", filtername); ajFmtPrintF(logf, "Empty singlets filter file %S\n", filtername); /* No continue this time. */ } /* 2. Add sequences from filter directories to List but mark them up (they are considered in the redundancy calculation but never appear in the output files). */ if(hitlist) { for(x=0; x<hitlist->N; x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc,hitlist->hits[x]->Acc); ajStrAssignS(&seq_tmp->Seq->Seq,hitlist->hits[x]->Seq); ajListPushAppend(seq_list,seq_tmp); } embHitlistDel(&hitlist); } else { for(x=0;x<ajSeqsetGetSize(seqset);x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x)); ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x)); ajListPushAppend(seq_list,seq_tmp); } ajSeqsetDel(&seqset); ajSeqinDel(&seqin); } } } /**********************************/ /* Open sets filter file */ /**********************************/ if(dosets) { /* Open sets file. */ ajStrAssignS(&filtername, inname); ajFilenameTrimPathExt(&filtername); ajStrInsertS(&filtername, 0, ajDirGetPath(insets)); ajStrAppendC(&filtername, "."); ajStrAppendS(&filtername, ajDirGetExt(insets)); if((filterf = ajFileNewInNameS(filtername)) == NULL) { ajWarn("Could not open DAF file %S", filtername); ajFmtPrint("Could not open sets filter file %S", filtername); } else { /* Read DAF file. */ ok = ajFalse; if(!(ajDmxScopalgRead(filterf, &scopalg))) { ajWarn("ajDmxScopalgRead call failed in seqnr"); ajFmtPrintF(logf, "ajDmxScopalgRead call failed in seqnr\n"); /* Read sequence set instead. */ seqset = ajSeqsetNew(); seqin = ajSeqinNew(); ajSeqinUsa(&seqin, inname); if(!(ajSeqsetRead(seqset, seqin))) ajFatal("SeqsetRead failed in seqnr"); if(ajSeqsetGetSize(seqset)) ok = ajTrue; } else if(scopalg->N) ok = ajTrue; /* Close DHF file. */ ajFileClose(&filterf); /* Process empty DHF files (should never occur). */ if(!ok) { ajWarn("Empty sets filter file %S\n", filtername); ajFmtPrintF(logf, "Empty sets filter file %S\n", filtername); /* No continue this time. */ } /* 2. Add sequences from filter directories to List but mark them up (they are considered in the redundancy calculation but never appear in the output files).. */ if(scopalg) { for(x=0; x<scopalg->N; x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc,scopalg->Codes[x]); ajStrAssignS(&seq_tmp->Seq->Seq,scopalg->Seqs[x]); /* Remove gap char's & whitespace. */ ajStrRemoveGap(&seq_tmp->Seq->Seq); ajListPushAppend(seq_list,seq_tmp); } ajDmxScopalgDel(&scopalg); } else { for(x=0;x<ajSeqsetGetSize(seqset);x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x)); ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x)); ajListPushAppend(seq_list,seq_tmp); } ajSeqsetDel(&seqset); ajSeqinDel(&seqin); } } } /* 4. Identify redundant domains.. */ if(moden == 1) { if((!embDmxSeqNR(seq_list, &keep, &nseqnr, matrix, gapopen, gapextend, thresh, ajTrue))) ajFatal("embDmxSeqNR failure in seqnr"); } else { if((!embDmxSeqNRRange(seq_list, &keep, &nseqnr, matrix, gapopen, gapextend, threshlow, threshup, ajTrue))) ajFatal("embDmxSeqNR failure in seqnr"); } seq_siz = ajListGetLength(seq_list); for(x=0; x<seq_siz; x++) if(ajUintGet(keep, x) == 1) ajUintPut(&nokeep, x, 0); else ajUintPut(&nokeep, x, 1); /* Create output files. */ ajStrAssignS(&outname, inname); ajFilenameTrimPathExt(&outname); outf = ajFileNewOutNameDirS(outname, out); if(dored) redf = ajFileNewOutNameDirS(outname, outred); /* 5. Write non-redundant domains to main output directory. 6. If specified, write redundant domains to output directory. */ embHitlistWriteSubsetFasta(outf, infhits, keep); if(dored) embHitlistWriteSubsetFasta(redf, infhits, nokeep); embHitlistDel(&infhits); ajFileClose(&outf); ajFileClose(&redf); ajStrDel(&inname); while(ajListPop(seq_list, (void **) &seq_tmp)) { ajSeqDel(&seq_tmp->Seq); AJFREE(seq_tmp); } ajListFree(&seq_list); ajUintDel(&keep); ajUintDel(&nokeep); } /* Tidy up. */ ajListFree(&in); if(singlets) ajDirDel(&singlets); if(insets) ajDirDel(&insets); ajDiroutDel(&out); if(outred) ajDiroutDel(&outred); ajFileClose(&logf); ajMatrixfDel(&matrix); ajStrDel(&filtername); ajStrDel(&outname); ajStrDel(&mode); embExit(); return 0; }