// assimilate_handler() is called by BOINC code and is passed the canonical // result for a workunit. assimilate_handler() reads the referenced result // file and inserts the result and its signals into the master science DB. // BOINC also passes the workunit (as it appears in the BOINC DB) and a vector // containing all results (including the canonical one) for that workunit. // We use the workunit to determine if there is an error condition. int assimilate_handler( WORKUNIT& boinc_wu, vector<RESULT>& boinc_results, RESULT& boinc_canonical_result ) { int retval=0; int spike_count=0, spike_inserted_count=0, gaussian_count=0, gaussian_inserted_count=0, pulse_count=0, pulse_inserted_count=0, triplet_count=0, triplet_inserted_count=0; static receiver_config receiver_cfg; static analysis_config analysis_cfg; workunit s_wu; workunit_grp s_wu_grp; result sah_result; spike sah_spike; gaussian sah_gaussian; pulse sah_pulse; triplet sah_triplet; char filename[256]; char * path; std::string path_str; long sah_result_id; sqlint8_t sah_spike_id, sah_gaussian_id, sah_pulse_id, sah_triplet_id; static bool first_time = true; int sql_error_code; long long seti_wu_id; time_t now; int hotpix_update_count; int hotpix_insert_count; APP_CONFIG sah_config; hotpix hotpix; list<long> qpixlist; // will be a unique list of qpixes for // updating the hotpix table list<long>::iterator qpix_i; nanotime.tv_sec = 0; nanotime.tv_nsec = 1000000; // app specific configuration if (first_time) { first_time = false; receiver_cfg.id = 0; analysis_cfg.id = 0; retval = sah_config.parse_file(".."); if (retval) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "First entrance to handler : can't parse config file. Exiting.\n" ); return(retval); } else { retval = db_change(sah_config.scidb_name); if (!retval) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "First entrance to handler : could not open science DB %s. Exiting.\n", sah_config.scidb_name ); return(retval); } else { log_messages.printf(SCHED_MSG_LOG::MSG_NORMAL, "First entrance to handler : using science DB %s\n", sah_config.scidb_name ); } } // Sometimes we want to perform all assimilation functions // *except* insertion into the science master DB. if (noinsert) { log_messages.printf(SCHED_MSG_LOG::MSG_NORMAL, "[%s] assimilator is in noinsert mode.\n", boinc_wu.name ); } } else { /* retval = db_change(sah_config.scidb_name); if (!retval) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "First entrance to handler : could not open science DB %s. Exiting.\n", sah_config.scidb_name ); return(retval); } else { log_messages.printf(SCHED_MSG_LOG::MSG_NORMAL, "First entrance to handler : using science DB %s\n", sah_config.scidb_name ); } */ } if (noinsert) return 0; // Note that this will result in the WU being marked as assimilated - // we will not see it again. // translate seti wuid for thos wus that changed ids during the DB merge seti_wu_id = new_wu_id((long long)boinc_wu.opaque); log_messages.printf(SCHED_MSG_LOG::MSG_DEBUG, "[%s] old seti WU id is : %lld new seti WU id is : %lld\n", boinc_wu.name, (long long)boinc_wu.opaque, seti_wu_id ); if (boinc_wu.canonical_resultid) { log_messages.printf(SCHED_MSG_LOG::MSG_DEBUG, "[%s] Canonical result is %d. SETI workunit ID is %lld.\n", boinc_wu.name, boinc_wu.canonical_resultid, seti_wu_id ); } else { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "[%s] No canonical result\n", boinc_wu.name ); } if (!boinc_wu.canonical_resultid) { return 0; // Note that this will result in the WU being marked as assimilated - // we will not see it again. No canonical result means that // too many results were returned with no concensus. } // Obtain and check the full path to the boinc result file. retval = get_output_file_path(boinc_canonical_result, path_str); if (retval) { if (retval == ERR_XML_PARSE) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "[%s] Cannot extract filename from canonical result %ld.\n", boinc_wu.name, boinc_wu.canonical_resultid); return(retval); } else { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "[%s] unknown error from get_output_file_path() for result %ld.\n", boinc_wu.name, boinc_wu.canonical_resultid); return(retval); } } else { path = (char *)path_str.c_str(); if (!boinc_file_exists(path)) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "[%s] Output file %s does not exist for result %ld.\n", boinc_wu.name, path, boinc_wu.canonical_resultid); return(-1); } else { log_messages.printf(SCHED_MSG_LOG::MSG_DEBUG, "[%s] Result %ld : using upload file %s\n", boinc_wu.name, boinc_wu.canonical_resultid, path); } } // Open it. std::ifstream result_file(path, ios_base::in); if (!result_file.is_open()) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "[%s] open error for result file %s : errno %d\n", boinc_wu.name, path, errno ); return -1; } retval = get_science_configs(boinc_wu, seti_wu_id, receiver_cfg, analysis_cfg); if (retval) { if (retval == 100) { return (0); } else { return (-1); } } log_messages.printf(SCHED_MSG_LOG::MSG_DEBUG, "[%s] Result %ld : using receiver_cfg %d and analysis_cfg %d\n", boinc_wu.name, boinc_wu.canonical_resultid, receiver_cfg.id, analysis_cfg.id); // Insert a sah result retval = populate_seti_result(sah_result, boinc_canonical_result, boinc_wu, seti_wu_id); sah_result_id = sah_result.insert(); if (sah_result_id) { log_messages.printf(SCHED_MSG_LOG::MSG_NORMAL, "[%s] Inserted result. Boinc result id is %d. Sah result id is %lld.\n", boinc_wu.name, boinc_canonical_result.id, (long long)sah_result_id ); } else { if (sql_last_error_code() == -239 || sql_last_error_code() == -268) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "[%s] Could not insert duplicate result. SQLCODE is %ld. SQLMSG is %s.\n", boinc_wu.name, sql_last_error_code(), sql_error_message() ); return 0; // non-fatal - we will never see this result again } else { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "[%s] Could not insert result. SQLCODE is %ld. SQLMSG is %s.\n", boinc_wu.name, sql_last_error_code(), sql_error_message() ); return -1; // fatal - non-dup error } } // Insert all sah signals in turn insert_signals( sah_spike, "spike", boinc_wu.name, sah_result_id, result_file, receiver_cfg, boinc_canonical_result.appid, analysis_cfg.max_spikes, qpixlist); insert_signals( sah_gaussian, "gaussian", boinc_wu.name, sah_result_id, result_file, receiver_cfg, boinc_canonical_result.appid, analysis_cfg.max_gaussians, qpixlist); insert_signals( sah_pulse, "pulse", boinc_wu.name, sah_result_id, result_file, receiver_cfg, boinc_canonical_result.appid, analysis_cfg.max_pulses, qpixlist); insert_signals( sah_triplet, "triplet", boinc_wu.name, sah_result_id, result_file, receiver_cfg, boinc_canonical_result.appid, analysis_cfg.max_triplets, qpixlist); // update last hit time to now for each qpix hit qpixlist.unique(); hotpix_update_count = 0; hotpix_insert_count = 0; time(&now); for(qpix_i = qpixlist.begin(); qpix_i != qpixlist.end(); qpix_i++) { if (hotpix.fetch(*qpix_i)) { hotpix.last_hit_time = now; hotpix.update(); hotpix_update_count++; } else { hotpix.id = *qpix_i; hotpix.last_hit_time = now; hotpix.insert(*qpix_i); hotpix_insert_count++; } } log_messages.printf(SCHED_MSG_LOG::MSG_DEBUG, "[%s] Updated %d rows and inserted %d rows in the hotpix table\n", boinc_wu.name, hotpix_update_count, hotpix_insert_count ); return 0; // the successful assimilation of one WU }
int main(int argc, char* argv[]) { ApplicationsLib::LogogSetup logo_setup; TCLAP::CmdLine cmd( "Integrates the given element property and outputs an OGS-5 direct " "Neumann boundary condition. The mesh has to contain a property " "'bulk_node_ids' that stores the original subsurface " "mesh node ids. Such surface meshes can be created using the OGS-6 " "tool ExtractSurface.\n\n" "OpenGeoSys-6 software, version " + BaseLib::BuildInfo::git_describe + ".\n" "Copyright (c) 2012-2019, OpenGeoSys Community " "(http://www.opengeosys.org)", ' ', BaseLib::BuildInfo::git_describe); TCLAP::ValueArg<std::string> in_mesh("i", "in-mesh", "the surface mesh that has an element " "property for the Neumann " "boundary condition", true, "", "filename for surface mesh input"); cmd.add(in_mesh); TCLAP::ValueArg<std::string> property_in_arg( "p", "property-in-name", "name of an element property used for the computation of the " "Neumann boundary condition", true, "", "string (property name)"); cmd.add(property_in_arg); TCLAP::ValueArg<std::string> property_out_arg( "", "property-out-name", "name of the node based property used for the output of the " "Neumann boundary condition", true, "", "string (property name)"); cmd.add(property_out_arg); TCLAP::ValueArg<std::string> result_file( "o", "result-out", "the file name the result will be written to ", true, "", "output file name"); cmd.add(result_file); cmd.parse( argc, argv ); // read surface mesh std::unique_ptr<MeshLib::Mesh> surface_mesh( MeshLib::IO::readMeshFromFile(in_mesh.getValue())); auto const* const node_id_pv = [&]() -> MeshLib::PropertyVector<std::size_t>* { try { return surface_mesh->getProperties().getPropertyVector<std::size_t>( "bulk_node_ids", MeshLib::MeshItemType::Node, 1); } catch (std::runtime_error const& e) { WARN("%s", e.what()); return nullptr; } }(); if (!node_id_pv) return EXIT_FAILURE; std::vector<double> integrated_values = getSurfaceIntegratedValuesForNodes( *surface_mesh, property_in_arg.getValue()); std::vector<std::pair<std::size_t, double>> direct_values; direct_values.reserve(surface_mesh->getNumberOfNodes()); for (auto const* node : surface_mesh->getNodes()) { auto const id(node->getID()); auto const subsurface_node_id((*node_id_pv)[id]); auto const val(integrated_values[id]); direct_values.push_back(std::make_pair(subsurface_node_id, val)); } auto* const pv = surface_mesh->getProperties().createNewPropertyVector<double>( property_out_arg.getValue(), MeshLib::MeshItemType::Node, 1); pv->resize(surface_mesh->getNodes().size()); for (std::size_t k(0); k < surface_mesh->getNodes().size(); ++k) { (*pv)[k] = direct_values[k].second; } MeshLib::IO::writeMeshToFile(*surface_mesh, result_file.getValue()); std::ofstream result_out(result_file.getValue() + ".txt"); result_out.precision(std::numeric_limits<double>::digits10); for (auto const& p : direct_values) result_out << p.first << " " << p.second << "\n"; return EXIT_SUCCESS; }
void PepNovoOutfile::load( const std::string & result_filename, vector<PeptideIdentification> & peptide_identifications, ProteinIdentification & protein_identification, const double & score_threshold, const IndexPosMappingType & index_to_precursor, const map<String, String> & pnovo_modkey_to_mod_id ) { // generally used variables StringList substrings; map<String, Int> columns; PeptideHit peptide_hit; String line, score_type = "PepNovo", version = "unknown", identifier, filename, sequence, sequence_with_mods; DateTime datetime = DateTime::now(); // there's no date given from PepNovo protein_identification.setDateTime(datetime); peptide_identifications.clear(); PeptideIdentification peptide_identification; protein_identification = ProteinIdentification(); // open the result ifstream result_file(result_filename.c_str()); if (!result_file) { throw Exception::FileNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, result_filename); } Size line_number(0); // used to report in which line an error occurred Size id_count(0); // number of IDs seen (not necessarily the ones finally returned) getSearchEngineAndVersion(result_filename, protein_identification); //if information could not be retrieved from the outfile use defaults if (protein_identification.getSearchEngineVersion().empty()) { protein_identification.setSearchEngine("PepNovo"); protein_identification.setSearchEngineVersion(version); } identifier = protein_identification.getSearchEngine() + "_" + datetime.getDate(); protein_identification.setIdentifier(identifier); map<String, String> mod_mask_map; const vector<String> & mods = protein_identification.getSearchParameters().variable_modifications; for (vector<String>::const_iterator mod_it = mods.begin(); mod_it != mods.end(); ++mod_it) { if (mod_it->empty()) continue; //cout<<*mod_it<<endl; if (pnovo_modkey_to_mod_id.find(*mod_it) != pnovo_modkey_to_mod_id.end()) { //cout<<keys_to_id.find(*mod_it)->second<<endl; ResidueModification tmp_mod = ModificationsDB::getInstance()->getModification(pnovo_modkey_to_mod_id.find(*mod_it)->second); if (mod_it->prefix(1) == "^" || mod_it->prefix(1) == "$") { mod_mask_map[*mod_it] = "(" + tmp_mod.getId() + ")"; } else { mod_mask_map[*mod_it] = String(tmp_mod.getOrigin()) + "(" + tmp_mod.getId() + ")"; } } else { if (mod_it->prefix(1) != "^" && mod_it->prefix(1) != "$") { mod_mask_map[*mod_it] = mod_it->prefix(1) + "[" + mod_it->substr(1) + "]"; //cout<<mod_mask_map[*mod_it]<<endl; } else { mod_mask_map[*mod_it] = "[" + *mod_it + "]"; //cout<<mod_mask_map[*mod_it]<<endl; } } } Size index; while (getline(result_file, line)) { if (!line.empty() && (line[line.length() - 1] < 33)) line.resize(line.length() - 1); // remove weird EOL character line.trim(); ++line_number; if (line.hasPrefix(">> ")) // >> 1 /home/shared/pepnovo/4611_raw_ms2_picked.mzXML.1001.2.dta { ++id_count; if (!peptide_identification.empty() && !peptide_identification.getHits().empty()) { peptide_identifications.push_back(peptide_identification); } line.split(' ', substrings); //String index = File::basename(line.substr(line.find(' ', strlen(">> ")) + 1)); if (substrings.size() < 3) { throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Not enough columns (spectrum Id) in file in line " + String(line_number) + String(" (should be 2 or more)!"), result_filename); } try { index = substrings[2].trim().toInt(); } catch (...) { throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Expected an index number in line " + String(line_number) + String(" at position 2 (line was: '" + line + "')!"), result_filename); } //cout<<"INDEX: "<<index<<endl; peptide_identification = PeptideIdentification(); bool success = false; if (index_to_precursor.size()>0) { if (index_to_precursor.find(index) != index_to_precursor.end()) { peptide_identification.setRT(index_to_precursor.find(index)->second.first); peptide_identification.setMZ(index_to_precursor.find(index)->second.second); success = true; } else throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Index '" + String(index) + String("' in line '" + line + "' not found in index table (line was: '" + line + "')!"), result_filename); } if (!success) { // try to reconstruct from title entry (usually sensible when MGF is supplied to PepNovo) try { if (substrings.size() >= 4) { StringList parts = ListUtils::create<String>(substrings[3], '_'); if (parts.size() >= 2) { peptide_identification.setRT(parts[1].toDouble()); peptide_identification.setMZ(parts[0].toDouble()); success = true; } } } catch (...) { } if (!success) throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Precursor could not be reconstructed from title '" + substrings[3] + String("' in line '" + line + "' (line was: '" + line + "')!"), result_filename); } peptide_identification.setSignificanceThreshold(score_threshold); peptide_identification.setScoreType(score_type); peptide_identification.setIdentifier(identifier); } else if (line.hasPrefix("#Index")) // #Index Prob Score N-mass C-Mass [M+H] Charge Sequence { if (columns.empty()) // map the column names to their column number { line.split('\t', substrings); for (vector<String>::const_iterator s_i = substrings.begin(); s_i != substrings.end(); ++s_i) { if ((*s_i) == "#Index") columns["Index"] = s_i - substrings.begin(); else if ((*s_i) == "RnkScr") columns["RnkScr"] = s_i - substrings.begin(); else if ((*s_i) == "PnvScr") columns["PnvScr"] = s_i - substrings.begin(); else if ((*s_i) == "N-Gap") columns["N-Gap"] = s_i - substrings.begin(); else if ((*s_i) == "C-Gap") columns["C-Gap"] = s_i - substrings.begin(); else if ((*s_i) == "[M+H]") columns["[M+H]"] = s_i - substrings.begin(); else if ((*s_i) == "Charge") columns["Charge"] = s_i - substrings.begin(); else if ((*s_i) == "Sequence") columns["Sequence"] = s_i - substrings.begin(); } if (columns.size() != 8) { result_file.close(); result_file.clear(); throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Not enough columns in file in line " + String(line_number) + String(" (should be 8)!"), result_filename); } } while (getline(result_file, line)) { ++line_number; if (!line.empty() && (line[line.length() - 1] < 33)) line.resize(line.length() - 1); line.trim(); if (line.empty()) break; line.split('\t', substrings); if (!substrings.empty()) { if (substrings.size() != 8) { result_file.close(); result_file.clear(); throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Not enough columns in file in line " + String(line_number) + String(" (should be 8)!"), result_filename); } if (substrings[columns["RnkScr"]].toFloat() >= score_threshold) { peptide_hit = PeptideHit(); peptide_hit.setCharge(substrings[columns["Charge"]].toInt()); peptide_hit.setRank(substrings[columns["Index"]].toInt() + 1); peptide_hit.setScore(substrings[columns["RnkScr"]].toFloat()); peptide_hit.setMetaValue("PnvScr", substrings[columns["PnvScr"]].toFloat()); peptide_hit.setMetaValue("N-Gap", substrings[columns["N-Gap"]].toFloat()); peptide_hit.setMetaValue("C-Gap", substrings[columns["C-Gap"]].toFloat()); peptide_hit.setMetaValue("MZ", substrings[columns["[M+H]"]].toFloat()); sequence = substrings[columns["Sequence"]]; for (map<String, String>::iterator mask_it = mod_mask_map.begin(); mask_it != mod_mask_map.end(); ++mask_it) { if (mask_it->first.hasPrefix("^") && sequence.hasSubstring(mask_it->first)) { sequence.substitute(mask_it->first, ""); sequence = mask_it->second + sequence; } //cout<<mask_it->first<<" "<<mask_it->second<<endl; sequence.substitute(mask_it->first, mask_it->second); } peptide_hit.setSequence(AASequence::fromString(sequence)); peptide_identification.insertHit(peptide_hit); } } } } } if (!peptide_identifications.empty() || !peptide_identification.getHits().empty()) { peptide_identifications.push_back(peptide_identification); } result_file.close(); result_file.clear(); LOG_INFO << "Parsed " << id_count << " ids, retained " << peptide_identifications.size() << "." << std::endl; }