int main(int argc, char** argv) { CommandlineParser parpars("PoseIndices2PDB", "converts pose indices into PDB files ", VERSION, String(__DATE__), "Convert, combine and store"); parpars.registerMandatoryInputFile("i_clust", "input cluster index file"); parpars.registerMandatoryInputFile("i_trans", "input tranformation file"); parpars.registerMandatoryInputFile("i_pdb", "input reference pdb file"); parpars.registerMandatoryOutputFile("o", "output file name prefix for resulting pdb files"); parpars.setParameterAsHidden("o"); // parameters for galaxy for handling multiple output files parpars.registerOptionalGalaxyOutputId("o_id", "output file name prefix for 2nd to last pdb file", "$o.id"); // need to be hidden in command line mode parpars.setParameterAsHidden("o_id"); parpars.setParameterAsAdvanced("o_id"); // parameters for galaxy for handling multiple output files parpars.registerOptionalGalaxyOutputFolder("o_dir", "output directory for 2nd to last pdb file", "$__new_file_path__"); // need to be hidden in command line mode parpars.setParameterAsHidden("o_dir"); parpars.setParameterAsAdvanced("o_dir"); // the manual String man = "This tool converts all pose indices from a given transformation file and the corresponding reference PDBFile into separate PDBFiles.\n\nParameters are the input pose index file (-i_clust), the original transformation file (-i_trans), the corresponding reference pdb file (-i_pdb) and a naming schema for the resulting pdb files (-o). \n\nOutput of this tool is a set of PDBFiles representing the docking poses belonging to the given input cluster."; parpars.setToolManual(man); // here we set the types of I/O files parpars.setSupportedFormats("i_clust","txt"); parpars.setSupportedFormats("i_trans","dcd"); parpars.setSupportedFormats("i_pdb","pdb"); parpars.setSupportedFormats("o","pdb"); parpars.parse(argc, argv); ////////////////////////////////////////////////// // read the input PDBFile pdb; pdb.open(parpars.get("i_pdb")); System sys; pdb.read(sys); PoseClustering pc; if (parpars.has("i_trans")) { pc.options.set(PoseClustering::Option::RMSD_TYPE, PoseClustering::RIGID_RMSD); pc.setBaseSystemAndTransformations(sys, parpars.get("i_trans")); } //std::vector< std::set<Index> > clusters; LineBasedFile file(parpars.get("i_clust"), std::ios::in); vector<String> fields; String cluster_id = -1; String pose_id = -1; // called as command line or e.g. via galaxy? bool is_cmd = !parpars.has("env") || ((parpars.has("env") && parpars.get("env")=="cmdline")); bool first_sol = true; while (file.LineBasedFile::readLine()) { // get the line String current_cluster = file.getLine(); if (current_cluster.getField(1) == "cluster") { cluster_id = current_cluster.getField(2); pose_id = -1; if (file.LineBasedFile::readLine()) { current_cluster = file.getLine(); fields.clear(); current_cluster.split(fields); for (Size i=0; i < fields.size(); i++) { System new_pose_sys(sys); pose_id = fields[i]; pc.applyTransformation2System(pose_id.toInt(), new_pose_sys); // create the output name String outfile_name = String(parpars.get("o")) + "_clust_" + cluster_id + "_pose_" + String(pose_id) + ".pdb"; if (parpars.has("o_dir") && is_cmd && (parpars.get("o_dir") != "$__new_file_path__")) { outfile_name = String(parpars.get("o_dir")) + "/" + outfile_name; } // NOTE: Galaxy requires this strange naming convention // including the fact, that zero-th element has a different name if (!is_cmd) { outfile_name = (first_sol) ? String(parpars.get("o")) : String(parpars.get("o_dir")) + "/primary_" + String(parpars.get("o_id")) + "_clust_" + cluster_id + "_pose_" + String(pose_id) + "_visible_pdb"; } PDBFile file(outfile_name, ios::out); if (file.bad()) { Log.error() << "cannot write file " << outfile_name << endl; return 2; } file << new_pose_sys; file.close(); // needed for galaxy output if (first_sol) first_sol = false; Log << "wrote file " << outfile_name << endl; } } } } Log << "done." << endl; return 0; }
int main (int argc, char **argv) { // instantiate CommandlineParser object supplying // - tool name // - short description // - version string // - build date // - category CommandlineParser parpars("DockPoseClustering", "clusters docking poses ", VERSION, String(__DATE__), "Docking"); // we register an input file parameter // - CLI switch // - description // - Inputfile parpars.registerMandatoryInputFile("i_pdb", "input pdb-file"); parpars.registerOptionalInputFile("i_dcd", "input dcd-file"); ///TODO: offer the alternatives in a more elegant way! parpars.registerOptionalInputFile("i_trans", "or input transformation file for rigid rmsd clustering "); // we register an output file parameter // - CLI switch // - description // - parameter type // - required // - default value // - hidden in galaxy parpars.registerMandatoryOutputFile("o_index_list", "output file name for the index list "); parpars.setParameterAsHidden("o_index_list"); parpars.registerOptionalOutputFile("o_score_matrix", "output file name for scoring matrix "); parpars.setParameterAsHidden("o_score_matrix"); parpars.registerOptionalOutputFile("o_dcd", "output file name for the first cluster dcd file "); parpars.setParameterAsHidden("o_dcd"); parpars.registerOptionalGalaxyOutputId("o_dcd_id", "output id ", "$o_dcd.id"); // need to be hidden in command line mode parpars.setParameterAsAdvanced("o_dcd_id"); parpars.setParameterAsHidden("o_dcd_id"); parpars.registerOptionalGalaxyOutputFolder("o_dcd_dir", "output directory for 2nd to last cluster dcd file (if needed) ", "$__new_file_path__"); // need to be hidden in command line mode parpars.setParameterAsAdvanced("o_dcd_dir"); parpars.setParameterAsHidden("o_dcd_dir"); // register String parameter for supplying minimal rmsd between clusters parpars.registerOptionalDoubleParameter("rmsd_cutoff", "minimal rmsd between the final clusters (default 5.0) ", 5.0); parpars.setParameterRestrictions("rmsd_cutoff", 0, 100); // choice of cluster algorithm parpars.registerOptionalStringParameter("alg", "algorithm used for clustering (CLINK_DEFAYS, CLINK_ALTHAUS, NEAREST_NEIGHBOR_CHAIN_WARD, SLINK_SIBSON, TRIVIAL_COMPLETE_LINKAGE) ", "CLINK_DEFAYS"); list<String> cluster_algs; cluster_algs.push_back("CLINK_DEFAYS"); cluster_algs.push_back("CLINK_ALTHAUS"); cluster_algs.push_back("TRIVIAL_COMPLETE_LINKAGE"); cluster_algs.push_back("NEAREST_NEIGHBOR_CHAIN_WARD"); cluster_algs.push_back("SLINK_SIBSON"); parpars.setParameterRestrictions("alg", cluster_algs); // choice of atom rmsd scope parpars.registerOptionalStringParameter("scope", "atoms to be considered for scoreing a pose (C_ALPHA, BACKBONE, ALL_ATOMS) ", "C_ALPHA"); list<String> rmsd_levels; rmsd_levels.push_back("C_ALPHA"); //rmsd_levels.push_back("HEAVY_ATOMS"); //TODO rmsd_levels.push_back("BACKBONE"); rmsd_levels.push_back("ALL_ATOMS"); parpars.setParameterRestrictions("scope", rmsd_levels); // choice of rmsd type parpars.registerOptionalStringParameter("rmsd_type", "rmsd type used for clustering (SNAPSHOT_RMSD, RIGID_RMSD, CENTER_OF_MASS_DISTANCE) ", "SNAPSHOT_RMSD"); list<String> rmsd_types; rmsd_types.push_back("SNAPSHOT_RMSD"); rmsd_types.push_back("RIGID_RMSD"); rmsd_types.push_back("CENTER_OF_MASS_DISTANCE"); parpars.setParameterRestrictions("rmsd_type", rmsd_types); // further optional output parameters parpars.registerOptionalOutputFile("o_red_dcd", "output file for the reduced cluster set (dcd with one structure per final cluster) "); // write the final cluster tree in boost::serialize format, if it was computed parpars.registerOptionalOutputFile("o_cluster_tree", "output file containing the cluster tree in boost::serialize format (if the tree was computed) "); // register bool parameter for using pre-clustering parpars.registerFlag("use_refinement", "Apply a second clustering run with different options (-refine_alg <string>, -refine_rmsd_type <string>, and -refine_rmsd_scope <string>)", false, true); // refinement algorithm parpars.registerOptionalStringParameter("refine_alg", "algorithm used for second clustering run (CLINK_DEFAYS, NEAREST_NEIGHBOR_CHAIN_WARD, SLINK_SIBSON, TRIVIAL_COMPLETE_LINKAGE) ", "CLINK_DEFAYS"); parpars.setParameterAsHidden("refine_alg"); parpars.setParameterRestrictions("refine_alg", cluster_algs); // refinement rmsd type parpars.registerOptionalStringParameter("refine_rmsd_type", "rmsd type used for second clustering run (SNAPSHOT_RMSD, RIGID_RMSD, CENTER_OF_MASS_DISTANCE) ", "SNAPSHOT_RMSD"); parpars.setParameterAsHidden("refine_rmsd_type"); parpars.setParameterRestrictions("refine_rmsd_type", rmsd_types); // refinement rmsd scope parpars.registerOptionalStringParameter("refine_rmsd_scope", "atoms to be considered for rmsd score in second clustering run (C_ALPHA, BACKBONE, ALL_ATOMS) ", "C_ALPHA"); parpars.setParameterAsHidden("refine_rmsd_scope"); parpars.setParameterRestrictions("refine_rmsd_scope", rmsd_levels); // force serial execution, even if the algorithm supports parallel runs parpars.registerFlag("run_serial", "force serial excecution, even if parallel execution would be supported by the algorithm", false, true); // the manual String man = "This tool computes clusters of docking poses given as conformation set or a list of rigid transformations.\n\nParameters are either the input ConformationSet (-i_dcd) and one corresponding pdb file (-i_pdb), or a transformation file (-i_trans). Output can be a cluster index list (-o_index_list), a cluster scoring matrix (-o_score_matrix), or dcd files per cluster (-o_dcd). Optional parameters are the algorithm (-alg), the minimal rmsd between the final clusters (-rmsd_cutoff), the rmsd type (-rmsd_type), and the type of atoms used for scoring a pose (-scope). The optional parameter -o_red_dcd sets the output file for the reduced cluster set (one representative per cluster). The optional parameter -o_cluster_tree specifies the output file for storing the cluster tree.\n\nOutput of this tool depends in the choice of the output parameters."; parpars.setToolManual(man); // here we set the types of I/O files parpars.setSupportedFormats("i_dcd","dcd"); parpars.setSupportedFormats("i_pdb","pdb"); parpars.setSupportedFormats("i_trans","txt"); parpars.setSupportedFormats("o_index_list","txt"); parpars.setSupportedFormats("o_score_matrix","txt"); parpars.setSupportedFormats("o_dcd","dcd"); parpars.setSupportedFormats("o_red_dcd","dcd"); parpars.setSupportedFormats("o_cluster_tree","dat"); parpars.parse(argc, argv); ////////////////////////////////////////////////// if (parpars.has("o_dcd")) { if (!parpars.has("o_dcd_dir") || !parpars.has("o_dcd_id")) { Log << "Output type \"dcd\" requires setting the options \"o_dir\" \"o_id\"! Abort!" << endl; return 1; } } if ( parpars.has("o_cluster_tree") && (!parpars.has("alg") || parpars.get("alg") != "NEAREST_NEIGHBOR_CHAIN_WARD")) { Log << "Output of cluster tree requires Ward algorithm! Abort!" << endl; return 1; } // read the input PDBFile pdb; pdb.open(parpars.get("i_pdb")); System sys; pdb.read(sys); ConformationSet cs; cs.setup(sys); if (parpars.has("i_dcd")) { cs.readDCDFile(parpars.get("i_dcd")); } cs.resetScoring(); PoseClustering pc; if (parpars.has("i_trans")) { pc.setBaseSystemAndTransformations(sys, parpars.get("i_trans")); } if (parpars.has("rmsd_cutoff")) { float rmsd = parpars.get("rmsd_cutoff").toInt(); pc.options.setReal(PoseClustering::Option::DISTANCE_THRESHOLD, rmsd); } if (parpars.has("scope")) { String scope = parpars.get("scope"); if (scope == "C_ALPHA") pc.options.set(PoseClustering::Option::RMSD_LEVEL_OF_DETAIL, PoseClustering::C_ALPHA); else if (scope == "BACKBONE") pc.options.set(PoseClustering::Option::RMSD_LEVEL_OF_DETAIL, PoseClustering::BACKBONE); else if (scope == "ALL_ATOMS") pc.options.set(PoseClustering::Option::RMSD_LEVEL_OF_DETAIL, PoseClustering::ALL_ATOMS); else Log.info() << "Unknown value " << scope << " for option scope." << endl; } if (parpars.has("alg")) { String alg = parpars.get("alg"); if (alg == "CLINK_DEFAYS") pc.options.set(PoseClustering::Option::CLUSTER_METHOD, PoseClustering::CLINK_DEFAYS); else if (alg == "CLINK_ALTHAUS") pc.options.set(PoseClustering::Option::CLUSTER_METHOD, PoseClustering::CLINK_ALTHAUS); else if (alg == "SLINK_SIBSON") pc.options.set(PoseClustering::Option::CLUSTER_METHOD, PoseClustering::SLINK_SIBSON); else if (alg == "TRIVIAL_COMPLETE_LINKAGE") pc.options.set(PoseClustering::Option::CLUSTER_METHOD, PoseClustering::TRIVIAL_COMPLETE_LINKAGE); else if (alg == "NEAREST_NEIGHBOR_CHAIN_WARD") pc.options.set(PoseClustering::Option::CLUSTER_METHOD, PoseClustering::NEAREST_NEIGHBOR_CHAIN_WARD); else Log.info() << "Unknown value " << alg << " for option alg." << endl; } if (parpars.has("rmsd_type")) { String type = parpars.get("rmsd_type"); if (type == "SNAPSHOT_RMSD") pc.options.set(PoseClustering::Option::RMSD_TYPE, PoseClustering::SNAPSHOT_RMSD); else if (type == "RIGID_RMSD") pc.options.set(PoseClustering::Option::RMSD_TYPE, PoseClustering::RIGID_RMSD); else if (type == "CENTER_OF_MASS_DISTANCE") { pc.options.set(PoseClustering::Option::RMSD_TYPE, PoseClustering::CENTER_OF_MASS_DISTANCE); Log << "Parameter scope will be ignored!" << endl; } else Log.info() << "Unknown value " << type << " for option rmsd_type." << endl; } if (parpars.has("run_serial")) { pc.options.set(PoseClustering::Option::RUN_PARALLEL, false); } else { pc.options.set(PoseClustering::Option::RUN_PARALLEL, true); } if (parpars.has("i_dcd")) { pc.setConformationSet(&cs); } pc.compute(); // do we need a second clustering run? if (parpars.has("use_refinement")) { // get the options Options refine_options = pc.options; if (parpars.has("refine_rmsd_scope")) { String scope = parpars.get("refine_rmsd_scope"); if (scope == "C_ALPHA") refine_options.set(PoseClustering::Option::RMSD_LEVEL_OF_DETAIL, PoseClustering::C_ALPHA); else if (scope == "BACKBONE") refine_options.set(PoseClustering::Option::RMSD_LEVEL_OF_DETAIL, PoseClustering::BACKBONE); else if (scope == "ALL_ATOMS") refine_options.set(PoseClustering::Option::RMSD_LEVEL_OF_DETAIL, PoseClustering::ALL_ATOMS); else Log.info() << "Unknown value " << scope << " for option refine_rmsd_scope." << endl; } if (parpars.has("refine_alg")) { String alg = parpars.get("refine_alg"); if (alg == "CLINK_DEFAYS") refine_options.set(PoseClustering::Option::CLUSTER_METHOD, PoseClustering::CLINK_DEFAYS); else if (alg == "CLINK_ALTHAUS") refine_options.set(PoseClustering::Option::CLUSTER_METHOD, PoseClustering::CLINK_ALTHAUS); else if (alg == "SLINK_SIBSON") refine_options.set(PoseClustering::Option::CLUSTER_METHOD, PoseClustering::SLINK_SIBSON); else if (alg == "TRIVIAL_COMPLETE_LINKAGE") refine_options.set(PoseClustering::Option::CLUSTER_METHOD, PoseClustering::TRIVIAL_COMPLETE_LINKAGE); else if (alg == "NEAREST_NEIGHBOR_CHAIN_WARD") refine_options.set(PoseClustering::Option::CLUSTER_METHOD, PoseClustering::NEAREST_NEIGHBOR_CHAIN_WARD); else Log.info() << "Unknown value " << alg << " for option refine_alg." << endl; } if (parpars.has("refine_rmsd_type")) { String type = parpars.get("refine_rmsd_type"); if (type == "SNAPSHOT_RMSD") refine_options.set(PoseClustering::Option::RMSD_TYPE, PoseClustering::SNAPSHOT_RMSD); else if (type == "RIGID_RMSD") refine_options.set(PoseClustering::Option::RMSD_TYPE, PoseClustering::RIGID_RMSD); else if (type == "CENTER_OF_MASS_DISTANCE") { refine_options.set(PoseClustering::Option::RMSD_TYPE, PoseClustering::CENTER_OF_MASS_DISTANCE); Log << "Parameter scope will be ignored!" << endl; } else Log.info() << "Unknown value " << type << " for option refine_rmsd_type." << endl; } pc.refineClustering(refine_options); } Size num_clusters = pc.getNumberOfClusters(); Log << "Computed " << num_clusters << " clusters, start writing..." << endl; if (parpars.has("o_dcd")) { for (Size i = 0; i < num_clusters; i++) { Log << " Cluster " << i << " has " << pc.getClusterSize(i) << " members." << endl; boost::shared_ptr<ConformationSet> new_cs = pc.getClusterConformationSet(i); String outfile_name = (i == 0) ? String(parpars.get("o_dcd")) : String(parpars.get("o_dcd_dir")) + "/primary_" + String(parpars.get("o_dcd_id")) + "_cluster" + String(i) + "_visible_dcd"; //Log << " Writing solution " << String(i) << " as " << outfile_name << endl; new_cs->writeDCDFile(outfile_name); } } if (parpars.has("o_index_list")) { String outfile_name = String(parpars.get("o_index_list")); File cluster_outfile(outfile_name, std::ios::out); pc.printClusters(cluster_outfile); } if (parpars.has("o_score_matrix")) { String outfile_name = String(parpars.get("o_score_matrix")); File cluster_outfile(outfile_name, std::ios::out); pc.printClusterScores(cluster_outfile); } // print pc.printClusters(); pc.printClusterScores(); if (parpars.has("o_cluster_tree")) { File cluster_out(parpars.get("o_cluster_tree"), std::ios::out); pc.serializeWardClusterTree(cluster_out, true); cluster_out.close(); } if (parpars.has("o_red_dcd")) { String outfile_name = String(parpars.get("o_red_dcd")); boost::shared_ptr<ConformationSet> cs = pc.getReducedConformationSet(); cs->writeDCDFile(outfile_name); } Log << "done." << endl; return 0; }