예제 #1
0
// assimilate_handler() is called by BOINC code and is passed the canonical 
// result for a workunit.  assimilate_handler() reads the referenced result
// file and inserts the result and its signals into the master science DB.
// BOINC also passes the workunit (as it appears in the BOINC DB) and a vector
// containing all results (including the canonical one) for that workunit.
// We use the workunit to determine if there is an error condition.
int assimilate_handler(
    WORKUNIT& boinc_wu, vector<RESULT>& boinc_results, RESULT& boinc_canonical_result
) {
    int retval=0;
    int spike_count=0, spike_inserted_count=0, gaussian_count=0, gaussian_inserted_count=0, pulse_count=0, pulse_inserted_count=0, triplet_count=0, triplet_inserted_count=0;
    static receiver_config receiver_cfg;
    static analysis_config analysis_cfg;
    workunit s_wu;
    workunit_grp s_wu_grp;
    result sah_result;
    spike sah_spike;
    gaussian sah_gaussian;
    pulse sah_pulse;
    triplet sah_triplet;
    char filename[256];
    char * path;
    std::string path_str;
    long sah_result_id;
    sqlint8_t sah_spike_id, sah_gaussian_id, sah_pulse_id, sah_triplet_id;
    static bool first_time = true;
    int sql_error_code;
    long long seti_wu_id;
    time_t now;
    int hotpix_update_count;
    int hotpix_insert_count;

    APP_CONFIG sah_config;

    hotpix hotpix;
    list<long> qpixlist;            // will be a unique list of qpixes for
                                    // updating the hotpix table
    list<long>::iterator qpix_i;

    nanotime.tv_sec = 0;
    nanotime.tv_nsec = 1000000;


    // app specific configuration
    if (first_time) {
	first_time = false;
	receiver_cfg.id = 0;
	analysis_cfg.id   = 0;
    	retval = sah_config.parse_file("..");
    	if (retval) {
      		log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL,
       	     	"First entrance to handler : can't parse config file. Exiting.\n"
      		);
  		return(retval);
	} else {
		retval = db_change(sah_config.scidb_name); 
		if (!retval) {
      			log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL,
       	     		"First entrance to handler : could not open science DB %s. Exiting.\n",
			sah_config.scidb_name
      			);
  			return(retval);
		} else {
      			log_messages.printf(SCHED_MSG_LOG::MSG_NORMAL,
       	     		"First entrance to handler : using science DB %s\n",
			sah_config.scidb_name
      			);
		}
	}
    	// Sometimes we want to perform all assimilation functions
    	// *except* insertion into the science master DB.
    	if (noinsert) {
      		log_messages.printf(SCHED_MSG_LOG::MSG_NORMAL,
      		"[%s] assimilator is in noinsert mode.\n",
      		boinc_wu.name
      		);
    	}
    } else {
/*
	retval = db_change(sah_config.scidb_name);
        if (!retval) {
              log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL,
                  "First entrance to handler : could not open science DB %s. Exiting.\n",
                  sah_config.scidb_name
              );
              return(retval);
        } else {
              log_messages.printf(SCHED_MSG_LOG::MSG_NORMAL,
                  "First entrance to handler : using science DB %s\n",
                  sah_config.scidb_name
              );
        }
*/
   }
   if (noinsert) return 0;   	// Note that this will result in the WU being marked as assimilated - 
				// we will not see it again.

    // translate seti wuid for thos wus that changed ids during the DB merge
    seti_wu_id = new_wu_id((long long)boinc_wu.opaque);
   log_messages.printf(SCHED_MSG_LOG::MSG_DEBUG,
         	"[%s] old seti WU id is : %lld  new seti WU id is : %lld\n", 
            boinc_wu.name, (long long)boinc_wu.opaque, seti_wu_id
   );

    if (boinc_wu.canonical_resultid) {
      log_messages.printf(SCHED_MSG_LOG::MSG_DEBUG,
            "[%s] Canonical result is %d.  SETI workunit ID is %lld.\n", 
	    boinc_wu.name,  boinc_wu.canonical_resultid, seti_wu_id
      );
    } else {
      log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL,
            "[%s] No canonical result\n", boinc_wu.name
      );
    }

    if (!boinc_wu.canonical_resultid) {
	return 0;		// Note that this will result in the WU being marked as assimilated - 
                                // we will not see it again.  No canonical result means that
				// too many results were returned with no concensus. 

    }

    // Obtain and check the full path to the boinc result file.
    retval = get_output_file_path(boinc_canonical_result, path_str);
    if (retval) {
	if (retval == ERR_XML_PARSE) {
		log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL,
			"[%s] Cannot extract filename from canonical result %ld.\n",
            		boinc_wu.name,  boinc_wu.canonical_resultid);
        	return(retval);
	} else {
		log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL,
                        "[%s] unknown error from get_output_file_path() for result %ld.\n",
                        boinc_wu.name,  boinc_wu.canonical_resultid);
                return(retval);
   	}
     } else {
     	path = (char *)path_str.c_str();
     	if (!boinc_file_exists(path)) {
		log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL,
        		"[%s] Output file %s does not exist for result %ld.\n",
               	  	boinc_wu.name, path,  boinc_wu.canonical_resultid);
        	return(-1);
     	} else {
		log_messages.printf(SCHED_MSG_LOG::MSG_DEBUG,
                        "[%s] Result %ld : using upload file %s\n",
               	  	boinc_wu.name, boinc_wu.canonical_resultid, path);
	}
    }

    // Open it.
    std::ifstream result_file(path, ios_base::in);
    if (!result_file.is_open()) {
      log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL,
           "[%s] open error for result file %s : errno %d\n", 
           boinc_wu.name, path, errno
      );
      return -1;
    }

    retval = get_science_configs(boinc_wu, seti_wu_id, receiver_cfg, analysis_cfg);
    if (retval) {
	if (retval == 100) {
		return (0);
	} else {
		return (-1);
 	}
    }
    log_messages.printf(SCHED_MSG_LOG::MSG_DEBUG,
                        "[%s] Result %ld : using receiver_cfg %d and analysis_cfg %d\n",
               	  	boinc_wu.name, boinc_wu.canonical_resultid, receiver_cfg.id, analysis_cfg.id);

    // Insert a sah result
    retval = populate_seti_result(sah_result, boinc_canonical_result, boinc_wu, seti_wu_id);
    sah_result_id = sah_result.insert();
    if (sah_result_id) {
    	log_messages.printf(SCHED_MSG_LOG::MSG_NORMAL,
         		"[%s] Inserted result.  Boinc result id is %d.  Sah result id is %lld.\n", 
	 		boinc_wu.name, boinc_canonical_result.id, 
			(long long)sah_result_id
   	);
    } else {
	if (sql_last_error_code() == -239 || sql_last_error_code() == -268) {
		log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL,
                        "[%s] Could not insert duplicate result.  SQLCODE is %ld.  SQLMSG is %s.\n",
                        boinc_wu.name, sql_last_error_code(), sql_error_message()
        	);
		return 0; 	// non-fatal - we will never see this result again
	} else {
		log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL,
                        "[%s] Could not insert result.  SQLCODE is %ld.  SQLMSG is %s.\n",
                        boinc_wu.name, sql_last_error_code(), sql_error_message()
        	);
        	return -1;	// fatal - non-dup error
	}
    }

    // Insert all sah signals in turn
    insert_signals( sah_spike, 
                    "spike", 
                    boinc_wu.name, 
                    sah_result_id,
                    result_file, 
                    receiver_cfg, 
                    boinc_canonical_result.appid, 
                    analysis_cfg.max_spikes,
                    qpixlist);

    insert_signals( sah_gaussian, 
                    "gaussian", 
                    boinc_wu.name, 
                    sah_result_id,
                    result_file, 
                    receiver_cfg, 
                    boinc_canonical_result.appid, 
                    analysis_cfg.max_gaussians,
                    qpixlist);

    insert_signals( sah_pulse, 
                    "pulse", 
                    boinc_wu.name, 
                    sah_result_id,
                    result_file, 
                    receiver_cfg, 
                    boinc_canonical_result.appid, 
                    analysis_cfg.max_pulses,
                    qpixlist);

    insert_signals( sah_triplet, 
                    "triplet", 
                    boinc_wu.name, 
                    sah_result_id,
                    result_file, 
                    receiver_cfg, 
                    boinc_canonical_result.appid, 
                    analysis_cfg.max_triplets,
                    qpixlist);

    // update last hit time to now for each qpix hit
    qpixlist.unique();
    hotpix_update_count = 0;
    hotpix_insert_count = 0;
    time(&now); 
    for(qpix_i = qpixlist.begin(); qpix_i != qpixlist.end(); qpix_i++) {
        if (hotpix.fetch(*qpix_i)) {
            hotpix.last_hit_time = now;
            hotpix.update();
            hotpix_update_count++;
        } else {
            hotpix.id = *qpix_i;
            hotpix.last_hit_time = now;
            hotpix.insert(*qpix_i);
            hotpix_insert_count++;
        }
    }
    log_messages.printf(SCHED_MSG_LOG::MSG_DEBUG,
             "[%s] Updated %d rows and inserted %d rows in the hotpix table\n",
             boinc_wu.name, hotpix_update_count, hotpix_insert_count
    );

    

    return 0;   // the successful assimilation of one WU
}
예제 #2
0
int main(int argc, char* argv[])
{
    ApplicationsLib::LogogSetup logo_setup;

    TCLAP::CmdLine cmd(
        "Integrates the given element property and outputs an OGS-5 direct "
        "Neumann boundary condition. The mesh has to contain a property "
        "'bulk_node_ids' that stores the original subsurface "
        "mesh node ids. Such surface meshes can be created using the OGS-6 "
        "tool ExtractSurface.\n\n"
        "OpenGeoSys-6 software, version " +
            BaseLib::BuildInfo::git_describe +
            ".\n"
            "Copyright (c) 2012-2019, OpenGeoSys Community "
            "(http://www.opengeosys.org)",
        ' ', BaseLib::BuildInfo::git_describe);

    TCLAP::ValueArg<std::string> in_mesh("i",
                                         "in-mesh",
                                         "the surface mesh that has an element "
                                         "property for the Neumann "
                                         "boundary condition",
                                         true,
                                         "",
                                         "filename for surface mesh input");
    cmd.add(in_mesh);

    TCLAP::ValueArg<std::string> property_in_arg(
        "p",
        "property-in-name",
        "name of an element property used for the computation of the "
        "Neumann boundary condition",
        true,
        "",
        "string (property name)");
    cmd.add(property_in_arg);

    TCLAP::ValueArg<std::string> property_out_arg(
        "",
        "property-out-name",
        "name of the node based property used for the output of the "
        "Neumann boundary condition",
        true,
        "",
        "string (property name)");
    cmd.add(property_out_arg);

    TCLAP::ValueArg<std::string> result_file(
        "o",
        "result-out",
        "the file name the result will be written to ",
        true,
        "",
        "output file name");
    cmd.add(result_file);
    cmd.parse( argc, argv );

    // read surface mesh
    std::unique_ptr<MeshLib::Mesh> surface_mesh(
        MeshLib::IO::readMeshFromFile(in_mesh.getValue()));

    auto const* const node_id_pv =
        [&]() -> MeshLib::PropertyVector<std::size_t>* {
        try
        {
            return surface_mesh->getProperties().getPropertyVector<std::size_t>(
                "bulk_node_ids", MeshLib::MeshItemType::Node, 1);
        }
        catch (std::runtime_error const& e)
        {
            WARN("%s", e.what());
            return nullptr;
        }
    }();
    if (!node_id_pv)
        return EXIT_FAILURE;

    std::vector<double> integrated_values = getSurfaceIntegratedValuesForNodes(
        *surface_mesh, property_in_arg.getValue());
    std::vector<std::pair<std::size_t, double>> direct_values;
    direct_values.reserve(surface_mesh->getNumberOfNodes());

    for (auto const* node : surface_mesh->getNodes())
    {
        auto const id(node->getID());
        auto const subsurface_node_id((*node_id_pv)[id]);
        auto const val(integrated_values[id]);
        direct_values.push_back(std::make_pair(subsurface_node_id, val));
    }

    auto* const pv =
        surface_mesh->getProperties().createNewPropertyVector<double>(
            property_out_arg.getValue(), MeshLib::MeshItemType::Node, 1);
    pv->resize(surface_mesh->getNodes().size());
    for (std::size_t k(0); k < surface_mesh->getNodes().size(); ++k)
    {
        (*pv)[k] = direct_values[k].second;
    }

    MeshLib::IO::writeMeshToFile(*surface_mesh, result_file.getValue());

    std::ofstream result_out(result_file.getValue() + ".txt");
    result_out.precision(std::numeric_limits<double>::digits10);
    for (auto const& p : direct_values)
        result_out << p.first << " " << p.second << "\n";

    return EXIT_SUCCESS;
}
예제 #3
0
  void
  PepNovoOutfile::load(
    const std::string & result_filename,
    vector<PeptideIdentification> & peptide_identifications,
    ProteinIdentification & protein_identification,
    const double & score_threshold,
    const IndexPosMappingType & index_to_precursor,
    const map<String, String> & pnovo_modkey_to_mod_id
    )
  {
    // generally used variables
    StringList substrings;
    map<String, Int> columns;
    PeptideHit peptide_hit;

    String
      line,
      score_type = "PepNovo",
      version = "unknown",
      identifier,
      filename,
      sequence,
      sequence_with_mods;

    DateTime datetime = DateTime::now();     // there's no date given from PepNovo
    protein_identification.setDateTime(datetime);

    peptide_identifications.clear();
    PeptideIdentification peptide_identification;
    protein_identification = ProteinIdentification();

    // open the result
    ifstream result_file(result_filename.c_str());
    if (!result_file)
    {
      throw Exception::FileNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, result_filename);
    }

    Size line_number(0);     // used to report in which line an error occurred
    Size id_count(0);        // number of IDs seen (not necessarily the ones finally returned)

    getSearchEngineAndVersion(result_filename, protein_identification);
    //if information could not be retrieved from the outfile use defaults
    if (protein_identification.getSearchEngineVersion().empty())
    {
      protein_identification.setSearchEngine("PepNovo");
      protein_identification.setSearchEngineVersion(version);
    }
    identifier = protein_identification.getSearchEngine() + "_" + datetime.getDate();
    protein_identification.setIdentifier(identifier);

    map<String, String> mod_mask_map;
    const vector<String> & mods = protein_identification.getSearchParameters().variable_modifications;
    for (vector<String>::const_iterator mod_it = mods.begin(); mod_it != mods.end(); ++mod_it)
    {
      if (mod_it->empty())
        continue;
      //cout<<*mod_it<<endl;
      if (pnovo_modkey_to_mod_id.find(*mod_it) != pnovo_modkey_to_mod_id.end())
      {
        //cout<<keys_to_id.find(*mod_it)->second<<endl;
        ResidueModification tmp_mod = ModificationsDB::getInstance()->getModification(pnovo_modkey_to_mod_id.find(*mod_it)->second);
        if (mod_it->prefix(1) == "^" || mod_it->prefix(1) == "$")
        {
          mod_mask_map[*mod_it] = "(" + tmp_mod.getId() + ")";
        }
        else
        {
          mod_mask_map[*mod_it] = String(tmp_mod.getOrigin()) + "(" + tmp_mod.getId() + ")";
        }
      }
      else
      {
        if (mod_it->prefix(1) != "^" && mod_it->prefix(1) != "$")
        {
          mod_mask_map[*mod_it] = mod_it->prefix(1) + "[" + mod_it->substr(1) + "]";
          //cout<<mod_mask_map[*mod_it]<<endl;
        }
        else
        {
          mod_mask_map[*mod_it] = "[" + *mod_it + "]";
          //cout<<mod_mask_map[*mod_it]<<endl;
        }
      }
    }


    Size index;
    while (getline(result_file, line))
    {
      if (!line.empty() && (line[line.length() - 1] < 33)) line.resize(line.length() - 1); // remove weird EOL character
      line.trim();
      ++line_number;
      if (line.hasPrefix(">> "))         // >> 1 /home/shared/pepnovo/4611_raw_ms2_picked.mzXML.1001.2.dta
      {
        ++id_count;
        if (!peptide_identification.empty() && !peptide_identification.getHits().empty())
        {
          peptide_identifications.push_back(peptide_identification);
        }

        line.split(' ', substrings);
        //String index = File::basename(line.substr(line.find(' ', strlen(">> ")) + 1));
        if (substrings.size() < 3)
        {
          throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Not enough columns (spectrum Id) in file in line " + String(line_number) + String(" (should be 2 or more)!"), result_filename);
        }

        try
        {
          index = substrings[2].trim().toInt();
        }
        catch (...)
        {
          throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Expected an index number in line " + String(line_number) + String(" at position 2 (line was: '" + line + "')!"), result_filename);
        }

        //cout<<"INDEX: "<<index<<endl;
        peptide_identification = PeptideIdentification();
        bool success = false;
        if (index_to_precursor.size()>0)
        {
          if (index_to_precursor.find(index) != index_to_precursor.end())
          {
            peptide_identification.setRT(index_to_precursor.find(index)->second.first);
            peptide_identification.setMZ(index_to_precursor.find(index)->second.second);
            success = true;
          }
          else throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Index '" + String(index) + String("' in line '" + line + "' not found in index table (line was: '" + line + "')!"), result_filename);
        }

        if (!success)
        { // try to reconstruct from title entry (usually sensible when MGF is supplied to PepNovo)
          try
          {
            if (substrings.size() >= 4)
            {
              StringList parts = ListUtils::create<String>(substrings[3], '_');
              if (parts.size() >= 2)
              {
                peptide_identification.setRT(parts[1].toDouble());
                peptide_identification.setMZ(parts[0].toDouble());
                success = true;
              }
            }
          }
          catch (...)
          {

          }
          if (!success) throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Precursor could not be reconstructed from title '" + substrings[3] + String("' in line '" + line + "' (line was: '" + line + "')!"), result_filename);
        }
        peptide_identification.setSignificanceThreshold(score_threshold);
        peptide_identification.setScoreType(score_type);
        peptide_identification.setIdentifier(identifier);
      }
      else if (line.hasPrefix("#Index"))         // #Index  Prob    Score   N-mass  C-Mass  [M+H]   Charge  Sequence
      {
        if (columns.empty())           // map the column names to their column number
        {
          line.split('\t', substrings);
          for (vector<String>::const_iterator s_i = substrings.begin(); s_i != substrings.end(); ++s_i)
          {
            if ((*s_i) == "#Index")
              columns["Index"] = s_i - substrings.begin();
            else if ((*s_i) == "RnkScr")
              columns["RnkScr"] = s_i - substrings.begin();
            else if ((*s_i) == "PnvScr")
              columns["PnvScr"] = s_i - substrings.begin();
            else if ((*s_i) == "N-Gap")
              columns["N-Gap"] = s_i - substrings.begin();
            else if ((*s_i) == "C-Gap")
              columns["C-Gap"] = s_i - substrings.begin();
            else if ((*s_i) == "[M+H]")
              columns["[M+H]"] = s_i - substrings.begin();
            else if ((*s_i) == "Charge")
              columns["Charge"] = s_i - substrings.begin();
            else if ((*s_i) == "Sequence")
              columns["Sequence"] = s_i - substrings.begin();
          }

          if (columns.size() != 8)
          {
            result_file.close();
            result_file.clear();
            throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Not enough columns in file in line " + String(line_number) + String(" (should be 8)!"), result_filename);
          }
        }
        while (getline(result_file, line))
        {
          ++line_number;
          if (!line.empty() && (line[line.length() - 1] < 33))
            line.resize(line.length() - 1);
          line.trim();

          if (line.empty())
            break;

          line.split('\t', substrings);
          if (!substrings.empty())
          {
            if (substrings.size() != 8)
            {
              result_file.close();
              result_file.clear();
              throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Not enough columns in file in line " + String(line_number) + String(" (should be 8)!"), result_filename);
            }
            if (substrings[columns["RnkScr"]].toFloat() >= score_threshold)
            {
              peptide_hit = PeptideHit();
              peptide_hit.setCharge(substrings[columns["Charge"]].toInt());
              peptide_hit.setRank(substrings[columns["Index"]].toInt() + 1);
              peptide_hit.setScore(substrings[columns["RnkScr"]].toFloat());
              peptide_hit.setMetaValue("PnvScr", substrings[columns["PnvScr"]].toFloat());
              peptide_hit.setMetaValue("N-Gap", substrings[columns["N-Gap"]].toFloat());
              peptide_hit.setMetaValue("C-Gap", substrings[columns["C-Gap"]].toFloat());
              peptide_hit.setMetaValue("MZ", substrings[columns["[M+H]"]].toFloat());
              sequence = substrings[columns["Sequence"]];


              for (map<String, String>::iterator mask_it = mod_mask_map.begin(); mask_it != mod_mask_map.end(); ++mask_it)
              {
                if (mask_it->first.hasPrefix("^") && sequence.hasSubstring(mask_it->first))
                {
                  sequence.substitute(mask_it->first, "");
                  sequence = mask_it->second + sequence;
                }
                //cout<<mask_it->first<<" "<<mask_it->second<<endl;
                sequence.substitute(mask_it->first, mask_it->second);
              }
              peptide_hit.setSequence(AASequence::fromString(sequence));
              peptide_identification.insertHit(peptide_hit);
            }
          }
        }
      }
    }
    if (!peptide_identifications.empty() || !peptide_identification.getHits().empty())
    {
      peptide_identifications.push_back(peptide_identification);
    }

    result_file.close();
    result_file.clear();

    LOG_INFO << "Parsed " << id_count << " ids, retained " << peptide_identifications.size() << "." << std::endl;

  }