int main(ajint argc, char **argv) { AjPList pdb_path =NULL; /* Path of pdb files */ AjPStr pdb_name =NULL; /* Name of pdb file */ AjPDirout ccf_path =NULL; /* Path of ccf files */ AjPStr ccf_name =NULL; /* Name of ccf file */ AjPStr pdbid =NULL; /* PDB code */ AjPStr pdbid_temp =NULL; /* PDB code */ AjBool ccfnaming =ajFalse; /* True == use the pdbid code to name the output file, False== use the name of the original pdb file*/ /* Mask non-amino acid groups in protein chains that do not contain a C-alpha atom. The group will not appear in either the CO or SQ records of the clean coordinate file */ AjBool camask =ajFalse; /* Mask amino acids in protein chains that do not contain a C-alpha atom. The amino acid will appear not appear in the CO record but will still be present in the SQ record of the clean coordinate file */ AjBool camask1 =ajFalse; /* Mask residues or groups in protein chains with a single atom only */ AjBool atommask =ajFalse; AjPStr temp =NULL; /* Temp string */ AjPStr msg =NULL; /* Error message */ AjPStr base_name =NULL; /* Name of pdb file w/o path or extension */ AjPFile pdb_inf =NULL; /* pdb input file pointer */ AjPFile ccf_outf =NULL; /* ccf output file pointer */ AjPFile logf =NULL; /* log file pointer*/ AjPPdb pdb =NULL; /* Pdb structure (for parsed data) */ ajint min_chain_size=0; /* Minimum length of a SEQRES chain for it to be parsed */ ajint max_mismatch=0; /* Max. no. residues to trim when checking for missing C-terminal SEQRES residues. */ ajint max_trim=0; /* Maximum number of permissible mismatches between the ATOM and SEQRES sequences */ ajint pos =0; /* Location of the file extension in the pdb file name */ /* THIS_DIAGNOSTIC tempstr=ajStrNew(); ajStrAssignC(&tempstr, "diagnostics"); tempfile=ajFileNewOutNameS(tempstr); ajStrDel(&tempstr);*/ /* Initialise strings */ ccf_name = ajStrNew(); pdb_name = ajStrNew(); temp = ajStrNew(); msg = ajStrNew(); base_name = ajStrNew(); pdbid = ajStrNew(); pdbid_temp = ajStrNew(); /* Read data from acd */ embInitPV("pdbparse",argc,argv,"STRUCTURE",VERSION); pdb_path = ajAcdGetDirlist("pdbpath"); ccf_path = ajAcdGetOutdir("ccfoutdir"); logf = ajAcdGetOutfile("logfile"); min_chain_size=ajAcdGetInt("chnsiz"); max_mismatch =ajAcdGetInt("maxmis"); max_trim =ajAcdGetInt("maxtrim"); ccfnaming = ajAcdGetBoolean("ccfnaming"); camask = ajAcdGetBoolean("camask"); camask1 = ajAcdGetBoolean("camaska"); atommask = ajAcdGetBoolean("atommask"); /* Check directories*/ /*Start of main application loop*/ while(ajListPop(pdb_path,(void **)&temp)) { ajFmtPrint("Processing %S\n", temp); ajFmtPrintF(logf, "%S\n", temp); /* Read pdb file*/ if((pdb_inf=ajFileNewInNameS(temp))==NULL) { ajFmtPrintS(&msg, "Could not open for reading %S ", temp); ajWarn(ajStrGetPtr(msg)); ajFmtPrintF(logf, "%-15s%S\n//\n", "FILE_OPEN", temp); ajStrDel(&temp); continue; } /* Assign pdb id code from file name */ ajStrAssignS(&pdbid, temp); ajFilenameTrimPathExt(&pdbid); if(MAJSTRGETLEN(pdbid)>4) { /* The file name is longer than expected (and probably contains a prefix). Take the last four characters to be the pdbid code */ ajStrAssignSubS(&pdbid_temp, pdbid, pos-4, pos-1); ajStrAssignS(&pdbid, pdbid_temp); } else if(MAJSTRGETLEN(pdbid)<4) ajFatal("Could not determine pdbid code from file name (%S)", pdbid); /* Parse pdb file and write pdb structure */ if(!(pdb=ajPdbReadRawNew(pdb_inf, pdbid, min_chain_size, max_mismatch, max_trim, camask, camask1, atommask, logf))) { ajFmtPrintS(&msg, "Clean coordinate file not generated for %S", temp); ajWarn(ajStrGetPtr(msg)); ajFmtPrintF(logf, "%-15s%S\n//\n", "NO_OUTPUT", temp); ajFileClose(&pdb_inf); ajStrDel(&temp); continue; } /* Open clean coordinate file for writing*/ if(ccfnaming) ajStrAssignS(&ccf_name, pdb->Pdb); else ajStrAssignS(&ccf_name, temp); ajStrFmtLower(&ccf_name); if(!(ccf_outf=ajFileNewOutNameDirS(ccf_name, ccf_path))) { ajFmtPrintS(&msg, "Could not open %S for writing", ccf_name); ajWarn(ajStrGetPtr(msg)); ajFmtPrintF(logf, "%-15s%S\n//\n", "FILE_OPEN", ccf_name); ajFileClose(&pdb_inf); ajPdbDel(&pdb); ajStrDel(&temp); continue; } /* Write pdb file */ if(!ajPdbWriteAll(ccf_outf, pdb)) { ajFmtPrintS(&msg, "Could not write file %S", ccf_name); ajWarn(ajStrGetPtr(msg)); ajFmtPrintF(logf, "%-15s%S\n//\n", "FILE_WRITE", ccf_name); ajFmtPrintS(&temp, "rm %S", ccf_name); ajFmtPrint("%S", temp); ajSysSystem(temp); ajFileClose(&pdb_inf); ajFileClose(&ccf_outf); ajPdbDel(&pdb); ajStrDel(&temp); continue; } /* Tidy up*/ ajFileClose(&pdb_inf); ajFileClose(&ccf_outf); ajPdbDel(&pdb); ajStrDel(&temp); ajFmtPrintF(logf, "//\n"); } /*End of main application loop*/ /*Tidy up */ ajListFree(&pdb_path); ajStrDel(&pdb_name); ajDiroutDel(&ccf_path); ajStrDel(&ccf_name); ajStrDel(&base_name); ajStrDel(&pdbid); ajStrDel(&pdbid_temp); ajStrDel(&msg); ajFileClose(&logf); /* DIAGNOSTIC ajFileClose(&tempfile); */ /* Return */ ajExit(); return 0; }
int main(int argc, char **argv) { AjPList sigin = NULL; /* Signature input file names. */ AjPStr signame = NULL; /* Name of signature file. */ AjPFile sigf = NULL; /* Signature input file. */ EmbPSignature sig = NULL; /* Signature. */ AjPList siglist = NULL; /* List of signatures. */ AjIList sigiter = NULL; /* Iterator for siglist. */ AjBool sigok = ajFalse; /* True if signature processed ok. */ EmbPHit hit = NULL; /* Hit to store signature-sequence match. */ AjPList hits = NULL; /* List of hits */ AjPList ligands = NULL; /* List of top-scoring ligands. */ AjPSeqall database=NULL; /* Protein sequences to match signature against. */ AjPSeq seq = NULL; /* Current sequence. */ AjPMatrixf sub =NULL; /* Residue substitution matrix. */ float gapo =0.0; /* Gap insertion penalty. */ float gape =0.0; /* Gap extension penalty. */ AjPStr nterm=NULL; /* Holds N-terminal matching options from acd. */ ajint ntermi=0; /* N-terminal option as int. */ AjPFile hitsf =NULL; /* Hits output file. sequence matches. */ AjPDirout hitsdir=NULL; /* Directory of hits files (output). */ AjPFile alignf =NULL; /* Alignment output file. */ AjPDirout aligndir=NULL; /* Directory of alignment files (output). */ AjPFile resultsf =NULL; /* Results file (output). */ AjPDirout resultsdir=NULL; /* Directory of results files (output). */ AjPStr mode = NULL; /* Mode, 1: Patch score mode, 2: Site score mode. */ ajint modei = 0; /* Selected mode as integer. */ SigPLighit lighit = NULL; embInitPV("sigscanlig", argc, argv, "SIGNATURE",VERSION); /* GET VALUES FROM ACD */ sigin = ajAcdGetDirlist("siginfilesdir"); database = ajAcdGetSeqall("dbseqall"); sub = ajAcdGetMatrixf("sub"); gapo = ajAcdGetFloat("gapo"); gape = ajAcdGetFloat("gape"); nterm = ajAcdGetListSingle("nterm"); hitsdir = ajAcdGetOutdir("hitsoutdir"); aligndir = ajAcdGetOutdir("alignoutdir"); resultsdir = ajAcdGetOutdir("resultsoutdir"); mode = ajAcdGetListSingle("mode"); /*Assign N-terminal matching option etc. */ ajFmtScanS(nterm, "%d", &ntermi); modei = (ajint) ajStrGetCharFirst(mode)-48; /* READ & PROCESS SIGNATURES */ siglist = ajListNew(); while(ajListPop(sigin, (void **) &signame)) { /* Read signature files, compile signatures and populate list. */ sigok = ajFalse; if((sigf = ajFileNewInNameS(signame))) if((sig = embSignatureReadNew(sigf))) if(embSignatureCompile(&sig, gapo, gape, sub)) { sigok=ajTrue; ajListPushAppend(siglist, sig); /* ajFmtPrint("Id: %S\nDomid: %S\nLigid: %S\nns: %d\n" "sn: %d\nnp: %d\npn: %d\nminpatch: %d\n" "maxgap: %d\n", sig->Id, sig->Domid, sig->Ligid, sig->ns, sig->sn, sig->np, sig->pn, sig->minpatch, sig->maxgap); */ } if(!sigok) { ajWarn("Could not process %S", signame); embSignatureDel(&sig); ajFileClose(&sigf); ajStrDel(&signame); continue; } ajFileClose(&sigf); ajStrDel(&signame); } ajListFree(&sigin); /* ALIGN EACH QUERY SEQUENCE TO LIST OF SIGNATURE */ while(ajSeqallNext(database, &seq)) { /* Do sequence-signature alignment and save results */ hits = ajListNew(); sigiter = ajListIterNew(siglist); while((sig = (EmbPSignature) ajListIterGet(sigiter))) { if(embSignatureAlignSeq(sig, seq, &hit, ntermi)) { hit->Sig = sig; ajListPushAppend(hits, hit); hit=NULL; /* To force reallocation by embSignatureAlignSeq */ } /* There has to be a hit for each signature for correct generation of the LHF by sigscanlig_WriteFasta. So push an empty hit if necessary. 'hit'=NULL forces reallocation by embSignatureAlignSeq. */ /* else { hit = embHitNew(); ajListPushAppend(hits, hit); hit=NULL; } */ } ajListIterDel(&sigiter); /* Rank-order the list of hits by score */ ajListSort(hits, embMatchinvScore); /* Write ligand hits & alignment files (output) */ hitsf = ajFileNewOutNameDirS(ajSeqGetNameS(seq), hitsdir); alignf = ajFileNewOutNameDirS(ajSeqGetNameS(seq), aligndir); resultsf = ajFileNewOutNameDirS(ajSeqGetNameS(seq), resultsdir); /* if((!sigscanlig_WriteFasta(hitsf, siglist, hits))) ajFatal("Bad args to sigscanlig_WriteFasta"); */ if((!sigscanlig_WriteFasta(hitsf, hits))) ajFatal("Bad args to sigscanlig_WriteFasta"); if((!sigscanlig_SignatureAlignWriteBlock(alignf, hits))) ajFatal("Bad args to sigscanlig_SignatureAlignWriteBlock"); /* if((!sigscanlig_SignatureAlignWriteBlock(alignf, siglist, hits))) ajFatal("Bad args to sigscanlig_SignatureAlignWriteBlock"); */ /* Sort list of hits by ligand type and site number. Process list of ligands and print out. */ ajListSortTwo(hits, embMatchLigid, embMatchSN); if(modei==1) ligands = sigscanlig_score_ligands_patch(hits); else if(modei==2) ligands = sigscanlig_score_ligands_site(hits); else ajFatal("Unrecognised mode"); sigscanlig_WriteResults(ligands, resultsf); ajFileClose(&hitsf); ajFileClose(&alignf); ajFileClose(&resultsf); /* Memory management */ while(ajListPop(hits, (void **) &hit)) embHitDel(&hit); ajListFree(&hits); while(ajListPop(ligands, (void **) &lighit)) sigscanlig_LigHitDel(&lighit); ajListFree(&ligands); } /* MEMORY MANAGEMENT */ while(ajListPop(siglist, (void **) &sig)) embSignatureDel(&sig); ajListFree(&siglist); ajSeqallDel(&database); ajMatrixfDel(&sub); ajStrDel(&nterm); ajDiroutDel(&hitsdir); ajDiroutDel(&aligndir); ajDiroutDel(&resultsdir); ajStrDel(&mode); embExit(); return 0; }
int main(ajint argc, char **argv) { /* Variable declarations */ AjPFile inf_edam = NULL; /* Name of EDAM data (input) file */ AjPFile acdoutf = NULL; /* Name of ACD (output) file */ AjPList acdinlist = NULL; /* List of ACD file names (input) */ AjPFile acdinf = NULL; /* Name of ACD (input) file */ AjPStr acdname = NULL; /* Name of current acd file */ AjPDirout acdoutdir = NULL; /* Directory for ACD files (output) */ AjPFile inf_ktype = NULL; /* Name of knowntypes.standard file */ PEdam edam = NULL; /* EDAM relations data */ PKtype ktype = NULL; /* Data from knowntype.standard */ /* Read data from acd. */ embInitP("acdrelations",argc,argv,"MYEMBOSS"); /* ACD data handling */ inf_edam = ajAcdGetDatafile("infileedam"); inf_ktype = ajAcdGetInfile("infiletype"); acdinlist = ajAcdGetDirlist("indir"); acdoutdir = ajAcdGetOutdir("outdir"); /* Read data file */ edam = ajEdamNew(); ktype = ajKtypeNew(); acdrelations_readdatfile(inf_edam, &edam); acdrelations_readtypefile(inf_ktype, &ktype); /* Main application loop. Process each ACD file in turn. */ while(ajListPop(acdinlist,(void **)&acdname)) { if(!(acdinf = ajFileNewInNameS(acdname))) ajFatal("Cannot open input ACD file %S\n", acdname); ajFilenameTrimPath(&acdname); if(!(acdoutf = ajFileNewOutNameDirS(acdname, acdoutdir))) ajFatal("Cannot open output ACD file %S\n", acdname); acdrelations_procacdfile(acdinf, acdoutf, edam, ktype); ajFileClose(&acdinf); ajFileClose(&acdoutf); } /* Clean up and exit */ ajFileClose(&inf_edam); ajFileClose(&inf_ktype); ajListFree(&acdinlist); ajDiroutDel(&acdoutdir); ajEdamDel(&edam); ajExit(); return 0; }
int main(ajint argc, char **argv) { AjPList ccfin = NULL; /* List of CCF (input) files. */ AjPDir pdbin = NULL; /* Path of pdb input files. */ AjPStr pdbprefix = NULL; /* Prefix of pdb input files. */ AjPStr pdb_name = NULL; /* Full name (path/name/extension) of pdb format input file. */ AjPDirout ccfout = NULL; /* Path of coordinate output file. */ AjPStr randomname = NULL; /* Name for temp file tempf. */ AjPStr ccf_this = NULL; AjPStr exec = NULL; AjPStr naccess_str = NULL; AjPStr line = NULL; AjPStr syscmd = NULL; /* Command line arguments. */ AjPStr *mode = NULL; /* Mode of operation from acd. */ AjPFile errf = NULL; /* pdbplus error file pointer. */ AjPFile serrf = NULL; /* stride error file pointer. */ AjPFile nerrf = NULL; /* stride error file pointer. */ AjPFile tempf = NULL; /* Temp file for holding STRIDE output. */ AjPFile ccf_inf = NULL; /* Protein coordinate input file. */ AjPFile ccf_outf = NULL; /* Protein coordinate output file. */ AjIList iter = NULL; AjBool done_naccess= ajFalse; AjBool done_stride = ajFalse; AjBool found = ajFalse; AjPResidue temp_res = NULL; /* Pointer to Residue object. */ AjPPdb pdb_old = NULL; /* Pointer to PDB object - without new stride elements. */ AjPPdb pdb = NULL; /* Pointer to PDB object. */ ajint idn = 0; /* Chain identifier as a number (1,2,...) */ ajint chain_num = 0; /* Chain identifier index (0,1,...). */ ajint tS = 0; /* User-defined threshold size for SSEs. */ ajint nostride = 0; /* No. times stride failed */ ajint nonaccess = 0; /* No. times naccess failed */ ajint nofile = 0; /* No. times of file error */ /* Variables for each item that will be parsed from the ASG line. */ AjPStr res = NULL; /* Residue id from STRIDE ASG line (ALA etc). */ AjPStr res_num = NULL; /* PDB residue number from STRIDE ASG line. */ char pcid = ' '; /* Protein chain identifier from STRIDE or NACESS output (A,B, etc). */ char ss = ' '; /* One-letter secondary structure code from STRIDE ASG line. */ float ph = 0.0; /* Phi angle from STRIDE ASG line. */ float ps = 0.0; /* Psi angle from STRIDE ASG line. */ float sa = 0.0; /* Residue solvent accessible area from STRIDE ASG line. */ float f1 = 0; float f2 = 0; float f3 = 0; float f4 = 0; float f5 = 0; float f6 = 0; float f7 = 0; float f8 = 0; float f9 = 0; float f10 = 0; /* Allocate strings; this section is used for variables that are allocated once only. */ pdb_name = ajStrNew(); res = ajStrNew(); res_num = ajStrNew(); randomname = ajStrNew(); syscmd = ajStrNew(); line = ajStrNew(); naccess_str = ajStrNew(); exec = ajStrNew(); /* Read data from acd. */ embInitPV("pdbplus",argc,argv,"STRUCTURE",VERSION); ccfin = ajAcdGetDirlist("ccfinpath"); pdbin = ajAcdGetDirectory("pdbindir"); pdbprefix = ajAcdGetString("pdbprefix"); ccfout = ajAcdGetOutdir("ccfoutdir"); mode = ajAcdGetList("mode"); errf = ajAcdGetOutfile("logfile"); if(ajStrGetCharFirst(*mode) != '2') serrf = ajAcdGetOutfile("slogfile"); if(ajStrGetCharFirst(*mode) != '1') nerrf = ajAcdGetOutfile("nlogfile"); tS = ajAcdGetInt("thresholdsize"); ajRandomSeed(); ajFilenameSetTempname(&randomname); /* ** Start of main application loop. ** Process each PDB/ protein coordinate file (EMBL format) in turn. */ while(ajListPop(ccfin,(void **)&ccf_this)) { /* Open protein coordinate file. If it cannot be opened, write a message to the error file, delete ccf_this and continue. */ if((ccf_inf = ajFileNewInNameS(ccf_this)) == NULL) { ajWarn("%s%S\n//\n", "clean coordinate file not found: ", ccf_this); ajFmtPrintF(errf, "%s%S\n//\n", "clean coordinate file not found: ", ccf_this); ajStrDel(&ccf_this); nofile++; continue; } ajFmtPrint("Processing %S\n", ccf_this); fflush(stdout); /* Parse protein coordinate data (from clean format file) into AjPPdb object. ajPdbReadAllModelsNew will create the AjPPdb object. */ if(!(pdb_old=ajPdbReadAllModelsNew(ccf_inf))) { ajWarn("ERROR Clean coordinate file read" "error: %S\n//\n", ccf_this); ajFmtPrintF(errf, "ERROR Clean coordinate file read" "error: %S\n//\n", ccf_this); ajFileClose(&ccf_inf); ajStrDel(&ccf_this); nofile++; continue; } ajFileClose(&ccf_inf); ajPdbCopy(&pdb, pdb_old); ajPdbDel(&pdb_old); /* Construct name of corresponding PDB file. NACCESS does *not* generate an output file if the path is './' e.g. naccess ./1rbp.ent , therefore replace './' with null. */ ajStrAssignS(&pdb_name, ajDirGetPath(pdbin)); if(ajStrMatchC(pdb_name, "./") || ajStrMatchC(pdb_name, ".")) ajStrAssignC(&pdb_name, ""); ajStrAppendS(&pdb_name, pdbprefix); ajStrFmtLower(&pdb->Pdb); ajStrAppendS(&pdb_name, pdb->Pdb); ajStrAppendC(&pdb_name, "."); ajStrAppendS(&pdb_name, ajDirGetExt(pdbin)); /* Check corresponding PDB file exists for reading using ajFileStat. */ if(!(ajFilenameExistsRead(pdb_name))) { ajFmtPrintF(errf, "%s%S\n//\n", "PDB file not found: ", pdb_name); ajWarn("%s%S\n//\n", "PDB file not found: ", pdb_name); ajStrDel(&ccf_this); ajPdbDel(&pdb); nofile++; continue; } if(ajStrGetCharFirst(*mode) != '2') { /* ** Create a string containing the STRIDE command line (it needs ** PDB file name & name of temp output file). ** Call STRIDE by using ajSystem. */ ajFmtPrintS(&syscmd, "%S %S -f%S >> %s 2>&1", ajAcdGetpathC("stride"), pdb_name, randomname, ajFileGetNameC(serrf)); ajFmtPrint("%S %S -f%S >> %s 2>&1\n", ajAcdGetpathC("stride"), pdb_name, randomname,ajFileGetNameC(serrf)); system(ajStrGetPtr(syscmd)); /* Open the stride output file */ if (((tempf = ajFileNewInNameS(randomname)) == NULL)) { ajWarn("%s%S\n//\n", "no stride output for: ", pdb_name); ajFmtPrintF(errf, "%s%S\n//\n", "no stride output for: ", pdb_name); nostride++; ajStrDel(&ccf_this); ajPdbDel(&pdb); continue; } else ajFmtPrintF(errf, "%s%S\n//\n", "stride output for: ", pdb_name); done_stride = ajFalse; /* Parse STRIDE output from temp output file a line at a time. */ while(ajReadlineTrim(tempf,&line)) { if(ajStrPrefixC(line,"ASG")) { ajFmtScanS(line, "%*S %S %c %S %*d %c %*S %f %f %f %*S", &res, &pcid, &res_num, &ss, &ph, &ps, &sa); /* ** Populate pdbplus object with the data from this parsed ** line. This means first identifying the chain, then ** finding the residue. */ /* Determine the chain number. ajDmxPdbplusChain does not recognise '-', so change '-' to '.' */ if (pcid == '-') pcid = '.'; /* Get chain number from the chain identifier. */ if(!ajPdbChnidToNum(pcid, pdb, &idn)) { ajWarn("Could not convert chain id %c to chain" " number in pdb file %S\n//\n", pcid, pdb_name); ajFmtPrintF(errf, "Could not convert chain id %c " "to chain number in pdb file %S\n//\n", pcid, pdb_name); continue; } /* ** The chain number that will get written starts at 1, but ** we want an index into an array which must start at 0, ** so subtract 1 from the chain number to get the index. */ chain_num = idn-1; /* ** Iiterate through the list of residues in the Pdb object, ** found switches to true when first residue corresponding ** to the line is found. */ /* iter = ajListIterNewread(pdb->Chains[chain_num]->Atoms); */ iter = ajListIterNewread(pdb->Chains[chain_num]->Residues); found = ajFalse; while((temp_res = (AjPResidue)ajListIterGet(iter))) { /* If we have found the residue we want */ if((ajStrMatchS(res_num, temp_res->Pdb) && ajStrMatchS(res, temp_res->Id3))) { done_stride = ajTrue; found = ajTrue; temp_res->eStrideType = ss; temp_res->Phi = ph; temp_res->Psi = ps; temp_res->Area = sa; } /* If the matching residue has been processed move on to next ASG line, next residue. */ else if(found == ajTrue) break; else /* Matching residue not found yet. */ continue; } ajListIterDel(&iter); } /* End of if ASG loop. */ } /* End of while line loop. */ if(done_stride) ajFmtPrintF(errf, "%s%S\n//\n", "stride data for: ", pdb_name); else { ajFmtPrintF(errf, "%s%S\n//\n", "no stride data for: ", pdb_name); ajWarn("%s%S\n//\n", "no stride data for: ", pdb_name); nostride++; } /* Close STRIDE temp file. & tidy up. */ ajFileClose(&tempf); /* Remove temporary file (stride output file). */ ajFmtPrintS(&exec, "rm %S", randomname); ajSysSystem(exec); /* ** Calculate element serial numbers (eStrideNum)& ammend residue ** objects, count no's of elements and ammend chain object ** (numHelices, num Strands). */ pdbplus_sort(pdb, tS); } if(ajStrGetCharFirst(*mode) != '1') { /* ** Create a string containing the NACCESS command line (it needs ** PDB file name & name of temp output file) & call NACCESS. ** If e.g. /data/structure/pdbfred.ent was parsed and the program ** was run from /stuff, then /stuff/fred.asa and /stuff/fred.rsa ** would be written. These must be deleted once parsed (only ** use the .rsa file here). */ ajFmtPrintS(&syscmd, "%S %S >> %s 2>&1", ajAcdGetpathC("naccess"), pdb_name, ajFileGetNameC(nerrf)); ajFmtPrint("%S %S >> %s 2>&1\n", ajAcdGetpathC("naccess"), pdb_name, ajFileGetNameC(nerrf)); system(ajStrGetPtr(syscmd)); ajStrAssignS(&naccess_str, pdbprefix); ajStrAppendS(&naccess_str, pdb->Pdb); ajStrAppendC(&naccess_str, ".rsa"); /* Open the NACCESS output file. */ if (((tempf = ajFileNewInNameS(naccess_str)) == NULL)) { ajFmtPrintF(errf, "%s%S\n//\n", "no naccess output for: ", pdb_name); ajWarn("%s%S\n//\n", "no naccess output for: ", pdb_name); nonaccess++; ajStrDel(&ccf_this); ajPdbDel(&pdb); continue; } else ajFmtPrintF(errf, "%s%S\n//\n", "naccess output for: ", pdb_name); done_naccess = ajFalse; /* Parse NACCESS output from temp output file a line at a time. */ while(ajReadlineTrim(tempf,&line)) { if(ajStrPrefixC(line,"RES")) { /* Read data from lines. */ if((pcid = line->Ptr[8]) == ' ') ajFmtScanS(line, "%*S %S %S %f %f %f " "%f %f %f %f %f %f %f", &res, &res_num, &f1, &f2, &f3, &f4, &f5, &f6, &f7, &f8, &f9, &f10); else ajFmtScanS(line, "%*S %S %*c %S %f %f " "%f %f %f %f %f %f %f %f", &res, &res_num, &f1, &f2, &f3, &f4, &f5, &f6, &f7, &f8, &f9, &f10); /* Identify the chain, then finding all the residues corresponding to the residue. */ /* Get the chain number from the chain identifier. */ if(!ajPdbChnidToNum(pcid, pdb, &idn)) { ajWarn("Could not convert chain id %c to chain" " number in pdb file %S\n//\n", pcid, pdb_name); ajFmtPrintF(errf, "Could not convert chain id" " %c to chain number in pdb file %S\n//\n", pcid, pdb_name); continue; } /* ** Chain number will start at 1, but we want an index ** into an array which must start at 0, so subtract 1 ** from the chain number to get the index. */ chain_num = idn-1; /* ** Iiterate through the list of residues in the Pdb object, ** temp_res is an AjPResidue used to point to the current ** residue. ** ajBool found switches to true when first residue ** corresponding to the line is found. */ iter = ajListIterNewread(pdb->Chains[chain_num]->Residues); found = ajFalse; while((temp_res = (AjPResidue)ajListIterGet(iter))) { /* If we have found the residue we want, write the residue object. */ if((ajStrMatchS(res_num, temp_res->Pdb) && ajStrMatchS(res, temp_res->Id3))) { found = ajTrue; done_naccess = ajTrue; temp_res->all_abs = f1; temp_res->all_rel = f2; temp_res->side_abs = f3; temp_res->side_rel = f4; temp_res->main_abs = f5; temp_res->main_rel = f6; temp_res->npol_abs = f7; temp_res->npol_rel = f8; temp_res->pol_abs = f9; temp_res->pol_rel = f10; } /* If the matching residues have all been processed. move on to next ASG line, next residue. */ else if(found == ajTrue) break; else /* Matching residues not found yet, move on to next residue. */ continue; } ajListIterDel(&iter); } } if(done_naccess) ajFmtPrintF(errf, "%s%S\n//\n", "naccess data for: ", pdb_name); else { ajFmtPrintF(errf, "%s%S\n//\n", "no naccess data for: ", pdb_name); ajWarn("%s%S\n//\n", "no naccess data for: ", pdb_name); nonaccess++; } /* Remove temporary file (naccess output files). */ ajFileClose(&tempf); ajFmtPrintS(&exec, "rm %S", naccess_str); ajSysSystem(exec); ajStrAssignS(&naccess_str, pdbprefix); ajStrAppendS(&naccess_str, pdb->Pdb); ajStrAppendC(&naccess_str, ".asa"); ajFmtPrintS(&exec, "rm %S", naccess_str); ajSysSystem(exec); ajStrAssignS(&naccess_str, pdbprefix); ajStrAppendS(&naccess_str, pdb->Pdb); ajStrAppendC(&naccess_str, ".log"); ajFmtPrintS(&exec, "rm %S", naccess_str); ajSysSystem(exec); } /* Open CCF (output) file. */ ccf_outf = ajFileNewOutNameDirS(pdb->Pdb, ccfout); /* Write AjPPdb object to the output file in clean format. */ if(!ajPdbWriteAll(ccf_outf, pdb)) { ajWarn("%s%S\n//\n","Could not write results file for: ", pdb->Pdb); ajFmtPrintF(errf,"%s%S\n//\n","Could not write results file for ", pdb->Pdb); } ajFileClose(&ccf_outf); ajPdbDel(&pdb); ajStrDel(&ccf_this); } /* End of main application loop. */ ajFmtPrint("STRIDE failures: %d\n", nostride); ajFmtPrint("NACCESS failures: %d\n", nonaccess); ajFmtPrintF(errf, "\n\nSTRIDE failures: %d\nNACCESS failures: %d\n", nostride, nonaccess); ajListFree(&ccfin); ajDirDel(&pdbin); ajStrDel(&pdbprefix); ajStrDel(&pdb_name); ajDiroutDel(&ccfout); ajStrDel(&res); ajStrDel(&res_num); ajStrDel(&randomname); ajStrDel(&line); ajStrDel(&naccess_str); ajStrDel(&exec); ajStrDel(&syscmd); ajFileClose(&errf); if(ajStrGetCharFirst(*mode) != '2') ajFileClose(&serrf); if(ajStrGetCharFirst(*mode) != '1') ajFileClose(&nerrf); ajStrDel(&mode[0]); AJFREE(mode); ajExit(); return 0; }
int main(int argc, char **argv) { const char *codons[]= { "TAG","TAA","TGA","GCG","GCA","GCT","GCC","TGT", /* 00-07 */ "TGC","GAT","GAC","GAA","GAG","TTT","TTC","GGT", /* 08-15 */ "GGG","GGA","GGC","CAT","CAC","ATA","ATT","ATC", /* 16-23 */ "AAA","AAG","CTA","TTA","TTG","CTT","CTC","CTG", /* 24-31 */ "ATG","AAT","AAC","CCG","CCA","CCT","CCC","CAA", /* 32-39 */ "CAG","CGT","CGA","CGC","AGG","AGA","CGG","TCG", /* 40-47 */ "TCA","AGT","TCT","TCC","AGC","ACG","ACT","ACA", /* 48-55 */ "ACC","GTA","GTT","GTC","GTG","TGG","TAT","TAC" /* 56-63 */ }; const char *aa= "***AAAACCDDEEFFGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVWYY"; AjPFile inf = NULL; AjPFile outf = NULL; char *entryname = NULL; AjPStr fname = NULL; AjPStr key = NULL; AjPStr tmpkey = NULL; AjBool allrecords = AJFALSE; AjPTable table = NULL; ajint i = 0; ajint j = 0; ajint k = 0; ajint x = 0; ajint savecount[3]; AjPStr *keyarray = NULL; CutgPValues *valarray = NULL; AjPCod codon = NULL; ajint sum = 0; char c; AjPList flist = NULL; AjPFile logf = NULL; AjPStr entry = NULL; AjPStr baseentry = NULL; AjPStr wild = NULL; AjPStr division = NULL; AjPStr release = NULL; AjPStr wildspecies = NULL; CutgPValues value = NULL; AjPStr docstr = NULL; AjPStr species = NULL; AjPStr filename = NULL; ajint nstops; embInit("cutgextract",argc,argv); tmpkey = ajStrNew(); fname = ajStrNew(); table = ajTablestrNewLen(TABLE_ESTIMATE); flist = ajAcdGetDirlist("directory"); wild = ajAcdGetString("wildspec"); release = ajAcdGetString("release"); logf = ajAcdGetOutfile("outfile"); wildspecies = ajAcdGetString("species"); filename = ajAcdGetString("filename"); allrecords = ajAcdGetBoolean("allrecords"); ajStrInsertC(&release, 0, "CUTG"); ajStrRemoveWhite(&release); while(ajListPop(flist,(void **)&entry)) { ajStrAssignS(&baseentry, entry); ajFilenameTrimPath(&baseentry); ajDebug("Testing file '%S'\n", entry); if(!ajStrMatchWildS(baseentry,wild)) { ajStrDel(&entry); continue; } ajDebug("... matched wildcard '%S'\n", wild); inf = ajFileNewInNameS(entry); if(!inf) ajFatal("cannot open file %S",entry); ajFmtPrintS(&division, "%F", inf); ajFilenameTrimAll(&division); while((entryname = cutgextract_next(inf, wildspecies, &species, &docstr))) { if(ajStrGetLen(filename)) ajStrAssignS(&tmpkey,filename); else ajStrAssignC(&tmpkey,entryname); /* See if organism is already in the table */ value = ajTableFetch(table,tmpkey); if(!value) /* Initialise */ { key = ajStrNewS(tmpkey); AJNEW0(value); ajStrAssignS(&value->Species,species); ajStrAssignS(&value->Division, division); ajTablePut(table,(void *)key,(void *)value); } for(k=0;k<3;k++) savecount[k] = value->Count[k]; nstops = cutgextract_readcodons(inf,allrecords, value->Count); if(nstops < 1) { value->Skip++; continue; } value->CdsCount++; if(nstops>1) { value->CdsCount += (nstops - 1); value->Warn++; ajWarn("Found %d stop codons (%d %d %d) for CDS '%S'", nstops, value->Count[0] - savecount[0], value->Count[1] - savecount[1], value->Count[2] - savecount[2], cutgextractSavepid); } } ajStrDel(&entry); ajFileClose(&inf); } ajTableToarrayKeysValues(table,(void***) &keyarray, (void***) &valarray); i = 0; while(keyarray[i]) { key = keyarray[i]; value = (CutgPValues) valarray[i++]; codon = ajCodNew(); sum = 0; for(j=0;j<CODONS;++j) { sum += value->Count[j]; x = ajCodIndexC(codons[j]); codon->num[x] = value->Count[j]; c = aa[j]; if(c=='*') codon->aa[x] = 27; else codon->aa[x] = c-'A'; } ajCodCalcUsage(codon,sum); ajStrAppendC(&key, ".cut"); if(allrecords) { if(value->Warn) ajFmtPrintF(logf, "Writing %S CDS: %d Warnings: %d\n", key, value->CdsCount, value->Warn); else ajFmtPrintF(logf, "Writing %S CDS: %d\n", key, value->CdsCount); } else { if(value->Skip) ajFmtPrintF(logf, "Writing %S CDS: %d Skipped: %d\n", key, value->CdsCount, value->Skip); else ajFmtPrintF(logf, "Writing %S CDS: %d\n", key, value->CdsCount); } ajFmtPrintS(&fname,"CODONS/%S",key); outf = ajDatafileNewOutNameS(fname); if(!outf) ajFatal("Cannot open output file %S",fname); ajCodSetNameS(codon, key); ajCodSetSpeciesS(codon, value->Species); ajCodSetDivisionS(codon, value->Division); ajCodSetReleaseS(codon, release); ajCodSetNumcds(codon, value->CdsCount); ajCodSetNumcodons(codon, sum); ajCodWrite(codon, outf); ajFileClose(&outf); ajStrDel(&key); ajStrDel(&value->Division); ajStrDel(&value->Doc); ajStrDel(&value->Species); AJFREE(value); ajCodDel(&codon); } AJFREE(keyarray); AJFREE(valarray); ajTableFree(&table); ajListFree(&flist); ajStrDel(&wild); ajStrDel(&release); ajStrDel(&wildspecies); ajStrDel(&filename); ajFileClose(&logf); ajStrDel(&cutgextractSavepid); ajStrDel(&cutgextractLine); ajStrDel(&cutgextractOrg); ajStrDel(&fname); ajStrDel(&tmpkey); ajStrDel(&species); ajStrDel(&docstr); ajStrDel(&division); ajStrDel(&baseentry); embExit(); return 0; }
/* @prog domainreso *********************************************************** ** ** Removes low resolution domains from a DCF file (domain ** classification file). ** ******************************************************************************/ int main(ajint argc, char **argv) { AjPList cpdb_path = NULL; /* Location of coordinate files for input */ AjPStr cpdb_name = NULL; /* Name of coordinate file */ AjPStr temp = NULL; /* temp string */ AjPStr temp2 = NULL; /* temp string */ AjPList entry = NULL; /* List of pdb codes with resolution */ /* ABOVE the threshold */ AjPStr *entryarr = NULL; /* entry as an array */ AjPFile fptr_cpdb = NULL; /* Pointer to current coordinate file */ AjPFile dcfin = NULL; /* DCF input file */ AjPFile dcfout = NULL; /* DCF output file */ AjPPdb pdb = NULL; /* Pdb object pointer */ AjPDomain domain = NULL; /* Domain structure */ float threshold = 0.0; /* Resolution threshold */ ajint num = 0; /* number of nodes in list */ ajint type = 0; /* Type of domain (ajSCOP or ajCATH) in the DCF file */ /* Read data from acd */ embInitPV("domainreso",argc,argv,"DOMAINATRIX",VERSION); cpdb_path = ajAcdGetDirlist("cpdbpath"); threshold = ajAcdGetFloat("threshold"); dcfin = ajAcdGetInfile("dcfinfile"); dcfout = ajAcdGetOutfile("dcfoutfile"); /* Allocate strings etc. */ cpdb_name = ajStrNew(); temp = ajStrNew(); /* Create list . */ entry = ajListNew(); /* Create list of files in CPDB directory. */ /* Determine number of nodes on list */ num = ajListGetLength(cpdb_path); /* domainreso reads a directory of clean coordinate files file, creates a list of the files, then reads every list entry and extracts the resolution of the structure. If the value is less than a threshold (user defined) then the domain identifier is pushed onto a list. The DCF file (domain classification file) is then read and domain identifiers compared to those on the list, if found then the domain structure data is written the new DCF file. */ type = ajDomainDCFType(dcfin); /* Start of main application loop */ /* Produce list of pdb codes with resolution */ /* ABOVE the threshold. */ while(ajListPop(cpdb_path,(void **)&temp)) { /* Open coordinate file. */ if((fptr_cpdb=ajFileNewInNameS(temp))==NULL) { ajWarn("Could not open cpdb file"); ajStrDel(&temp); continue; } ajFmtPrint("%S\n", temp); fflush(stdout); /* Read coordinate data file. */ pdb = ajPdbReadFirstModelNew(fptr_cpdb); /* Check if resolution is above threshold. */ if(pdb->Reso > threshold) { /* assign ID to list. */ temp2=ajStrNew(); ajStrAssignS(&temp2, pdb->Pdb); ajListPush(entry, (AjPStr) temp2); } /* Close coordinate file and tidy up*/ ajPdbDel(&pdb); ajFileClose(&fptr_cpdb); ajStrDel(&temp); } num = ajListGetLength(entry); /* Sort the list of pdb codes & convert to an array. */ ajListSort(entry, domainreso_StrComp); ajListToarray(entry, (void ***)&entryarr); /* Read DCF file and compare IDs to those in list if not present then write domain structure data to output. . */ while((domain=(ajDomainReadCNew(dcfin, "*", type)))) { /* DOMAIN id not found in the list of domains with resolution above the threshold, so include it in the output file. */ if((domainreso_StrBinSearchDomain(ajDomainGetId(domain), entryarr, num))==-1) ajDomainWrite(dcfout, domain); /* Delete domain structure. */ ajDomainDel(&domain); } /* Tidy up. */ ajStrDel(&temp2); ajStrDel(&cpdb_name); ajFileClose(&dcfout); ajFileClose(&dcfin); ajListFree(&cpdb_path); ajListFree(&entry); AJFREE(entryarr); /* Return. */ ajExit(); return 0; }
/* @prog seqnr ************************************************************** ** ** Removes redundancy from DHF files (domain hits files) or other files of ** sequences. ** ****************************************************************************/ int main(int argc, char **argv) { /* Variable declarations */ AjPList in = NULL; /* Names of domain hits files (input). */ AjPStr inname = NULL; /* Full name of the current DHF file. */ AjPFile inf = NULL; /* Current DHF file. */ EmbPHitlist infhits = NULL; /* Hitlist from DHF file */ AjBool dosing = ajFalse; /* Filter using singlet sequences. */ AjPDir singlets = NULL; /* Singlets (input). */ AjBool dosets = ajFalse; /* Filter using sets of sequences. */ AjPDir insets = NULL; /* Sets (input). */ AjPStr mode = NULL; /* Mode of operation */ ajint moden = 0; /* Mode 1: single threshold for redundancy removal, 2: lower and upper thresholds for redundancy removal. */ float thresh = 0.0; /* Threshold for non-redundancy. */ float threshlow = 0.0; /* Threshold (lower limit). */ float threshup = 0.0; /* Threshold (upper limit). */ AjPMatrixf matrix = NULL; /* Substitution matrix. */ float gapopen = 0.0; /* Gap insertion penalty. */ float gapextend = 0.0; /* Gap extension penalty. */ AjPDirout out = NULL; /* Domain hits files (output). */ AjPFile outf = NULL; /* Current DHF file (output). */ AjBool dored = ajFalse; /* True if redundant hits are output. */ AjPDirout outred = NULL; /* DHF files for redundant hits (output).*/ AjPFile redf = NULL; /* Current DHF file redundancy (output). */ AjPStr outname = NULL; /* Name of output file (re-used). */ AjPFile logf = NULL; /* Log file pointer. */ AjBool ok = ajFalse; /* Housekeeping. */ AjPSeqset seqset = NULL; /* Seqset (re-used). */ AjPSeqin seqin = NULL; /* Seqin (re-used). */ AjPList seq_list = NULL; /* Main list for redundancy removal. */ EmbPDmxNrseq seq_tmp = NULL; /* Temp. pointer for making seq_list. */ ajint seq_siz = 0; /* Size of seq_list. */ AjPUint keep = NULL; /* 1: Sequence in seq_list was classed as non-redundant, 0: redundant. */ AjPUint nokeep = NULL; /* Inversion of keep array. */ ajint nseqnr = 0; /* No. non-redundant seqs. in seq_list. */ AjPStr filtername= NULL; /* Name of filter file (re-used). */ AjPFile filterf = NULL; /* Current filter file. */ EmbPHitlist hitlist = NULL; /* Hitlist from input file (re-used). */ AjPScopalg scopalg = NULL; /* Scopalg from input file. */ ajint x = 0; /* Housekeeping. */ /* Read data from acd. */ embInitPV("seqnr",argc,argv,"DOMSEARCH",VERSION); in = ajAcdGetDirlist("dhfinpath"); dosing = ajAcdGetToggle("dosing"); singlets = ajAcdGetDirectory("singletsdir"); dosets = ajAcdGetToggle("dosets"); insets = ajAcdGetDirectory("insetsdir"); mode = ajAcdGetListSingle("mode"); thresh = ajAcdGetFloat("thresh"); threshlow = ajAcdGetFloat("threshlow"); threshup = ajAcdGetFloat("threshup"); matrix = ajAcdGetMatrixf("matrix"); gapopen = ajAcdGetFloat("gapopen"); gapextend = ajAcdGetFloat("gapextend"); out = ajAcdGetOutdir("dhfoutdir"); dored = ajAcdGetToggle("dored"); outred = ajAcdGetOutdir("redoutdir"); logf = ajAcdGetOutfile("logfile"); /* Housekeeping. */ filtername = ajStrNew(); outname = ajStrNew(); if(!(ajStrToInt(mode, &moden))) ajFatal("Could not parse ACD node option"); /* Process each DHF (input) in turn. */ while(ajListPop(in,(void **)&inname)) { ajFmtPrint("Processing %S\n", inname); ajFmtPrintF(logf, "//\n%S\n", inname); seq_list = ajListNew(); keep = ajUintNew(); nokeep = ajUintNew(); /**********************************/ /* Open DHF file */ /**********************************/ if((inf = ajFileNewInNameS(inname)) == NULL) ajFatal("Could not open DHF file %S", inname); /* Read DHF file. */ ok = ajFalse; if(!(infhits = embHitlistReadFasta(inf))) { ajWarn("embHitlistReadFasta call failed in seqnr"); ajFmtPrintF(logf, "embHitlistReadFasta call failed in seqnr\n"); /* Read sequence set instead. */ seqset = ajSeqsetNew(); seqin = ajSeqinNew(); ajSeqinUsa(&seqin, inname); if(!(ajSeqsetRead(seqset, seqin))) ajFatal("SeqsetRead failed in seqsearch_psialigned"); if(ajSeqsetGetSize(seqset)) ok = ajTrue; } else if(infhits->N) ok = ajTrue; /* Close DHF file. */ ajFileClose(&inf); /* Process empty DHF files (should never occur). */ if(!ok) { ajWarn("Empty input file %S\n", inname); ajFmtPrintF(logf, "Empty input file %S\n", inname); if(infhits) embHitlistDel(&infhits); if(seqset) ajSeqsetDel(&seqset); if(seqin) ajSeqinDel(&seqin); continue; } /* 1. Create list of sequences from the main input directory.. */ if(infhits) { for(x=0; x<infhits->N; x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); ajStrAssignS(&seq_tmp->Seq->Acc,infhits->hits[x]->Acc); ajStrAssignS(&seq_tmp->Seq->Seq,infhits->hits[x]->Seq); ajListPushAppend(seq_list,seq_tmp); } } else { for(x=0;x<ajSeqsetGetSize(seqset);x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x)); ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x)); ajListPushAppend(seq_list,seq_tmp); } ajSeqsetDel(&seqset); ajSeqinDel(&seqin); } /**********************************/ /* Open singlets filter file */ /**********************************/ if(dosing) { /* Open singlets file. */ ajStrAssignS(&filtername, inname); ajFilenameTrimPathExt(&filtername); ajStrInsertS(&filtername, 0, ajDirGetPath(singlets)); ajStrAppendC(&filtername, "."); ajStrAppendS(&filtername, ajDirGetExt(singlets)); if((filterf = ajFileNewInNameS(filtername)) == NULL) { ajWarn("Could not open DHF file %S", filtername); ajFmtPrint("Could not open singlets filter file %S", filtername); } else { /* Read DHF file. */ ok = ajFalse; if(!(hitlist = embHitlistReadFasta(filterf))) { ajWarn("embHitlistReadFasta call failed in seqnr"); ajFmtPrintF(logf, "embHitlistReadFasta call failed in seqnr\n"); /* Read sequence set instead. */ seqset = ajSeqsetNew(); seqin = ajSeqinNew(); ajSeqinUsa(&seqin, inname); if(!(ajSeqsetRead(seqset, seqin))) ajFatal("SeqsetRead failed in seqnr"); if(ajSeqsetGetSize(seqset)) ok = ajTrue; } else if(hitlist->N) ok = ajTrue; /* Close DHF file. */ ajFileClose(&filterf); /* Process empty DHF files (should never occur). */ if(!ok) { ajWarn("Empty singlets filter file %S\n", filtername); ajFmtPrintF(logf, "Empty singlets filter file %S\n", filtername); /* No continue this time. */ } /* 2. Add sequences from filter directories to List but mark them up (they are considered in the redundancy calculation but never appear in the output files). */ if(hitlist) { for(x=0; x<hitlist->N; x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc,hitlist->hits[x]->Acc); ajStrAssignS(&seq_tmp->Seq->Seq,hitlist->hits[x]->Seq); ajListPushAppend(seq_list,seq_tmp); } embHitlistDel(&hitlist); } else { for(x=0;x<ajSeqsetGetSize(seqset);x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x)); ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x)); ajListPushAppend(seq_list,seq_tmp); } ajSeqsetDel(&seqset); ajSeqinDel(&seqin); } } } /**********************************/ /* Open sets filter file */ /**********************************/ if(dosets) { /* Open sets file. */ ajStrAssignS(&filtername, inname); ajFilenameTrimPathExt(&filtername); ajStrInsertS(&filtername, 0, ajDirGetPath(insets)); ajStrAppendC(&filtername, "."); ajStrAppendS(&filtername, ajDirGetExt(insets)); if((filterf = ajFileNewInNameS(filtername)) == NULL) { ajWarn("Could not open DAF file %S", filtername); ajFmtPrint("Could not open sets filter file %S", filtername); } else { /* Read DAF file. */ ok = ajFalse; if(!(ajDmxScopalgRead(filterf, &scopalg))) { ajWarn("ajDmxScopalgRead call failed in seqnr"); ajFmtPrintF(logf, "ajDmxScopalgRead call failed in seqnr\n"); /* Read sequence set instead. */ seqset = ajSeqsetNew(); seqin = ajSeqinNew(); ajSeqinUsa(&seqin, inname); if(!(ajSeqsetRead(seqset, seqin))) ajFatal("SeqsetRead failed in seqnr"); if(ajSeqsetGetSize(seqset)) ok = ajTrue; } else if(scopalg->N) ok = ajTrue; /* Close DHF file. */ ajFileClose(&filterf); /* Process empty DHF files (should never occur). */ if(!ok) { ajWarn("Empty sets filter file %S\n", filtername); ajFmtPrintF(logf, "Empty sets filter file %S\n", filtername); /* No continue this time. */ } /* 2. Add sequences from filter directories to List but mark them up (they are considered in the redundancy calculation but never appear in the output files).. */ if(scopalg) { for(x=0; x<scopalg->N; x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc,scopalg->Codes[x]); ajStrAssignS(&seq_tmp->Seq->Seq,scopalg->Seqs[x]); /* Remove gap char's & whitespace. */ ajStrRemoveGap(&seq_tmp->Seq->Seq); ajListPushAppend(seq_list,seq_tmp); } ajDmxScopalgDel(&scopalg); } else { for(x=0;x<ajSeqsetGetSize(seqset);x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x)); ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x)); ajListPushAppend(seq_list,seq_tmp); } ajSeqsetDel(&seqset); ajSeqinDel(&seqin); } } } /* 4. Identify redundant domains.. */ if(moden == 1) { if((!embDmxSeqNR(seq_list, &keep, &nseqnr, matrix, gapopen, gapextend, thresh, ajTrue))) ajFatal("embDmxSeqNR failure in seqnr"); } else { if((!embDmxSeqNRRange(seq_list, &keep, &nseqnr, matrix, gapopen, gapextend, threshlow, threshup, ajTrue))) ajFatal("embDmxSeqNR failure in seqnr"); } seq_siz = ajListGetLength(seq_list); for(x=0; x<seq_siz; x++) if(ajUintGet(keep, x) == 1) ajUintPut(&nokeep, x, 0); else ajUintPut(&nokeep, x, 1); /* Create output files. */ ajStrAssignS(&outname, inname); ajFilenameTrimPathExt(&outname); outf = ajFileNewOutNameDirS(outname, out); if(dored) redf = ajFileNewOutNameDirS(outname, outred); /* 5. Write non-redundant domains to main output directory. 6. If specified, write redundant domains to output directory. */ embHitlistWriteSubsetFasta(outf, infhits, keep); if(dored) embHitlistWriteSubsetFasta(redf, infhits, nokeep); embHitlistDel(&infhits); ajFileClose(&outf); ajFileClose(&redf); ajStrDel(&inname); while(ajListPop(seq_list, (void **) &seq_tmp)) { ajSeqDel(&seq_tmp->Seq); AJFREE(seq_tmp); } ajListFree(&seq_list); ajUintDel(&keep); ajUintDel(&nokeep); } /* Tidy up. */ ajListFree(&in); if(singlets) ajDirDel(&singlets); if(insets) ajDirDel(&insets); ajDiroutDel(&out); if(outred) ajDiroutDel(&outred); ajFileClose(&logf); ajMatrixfDel(&matrix); ajStrDel(&filtername); ajStrDel(&outname); ajStrDel(&mode); embExit(); return 0; }