int main( int argc, char** argv ) { #ifdef WIN32 BOOL console_ok = AllocConsole(); freopen("CONOUT$", "wb", stdout); freopen("CONOUT$", "wb", stderr); //freopen( "console.txt", "wb", stdout); //freopen( "console.txt", "wb", stderr); printf("testing stdout\n"); fprintf(stderr, "testing stderr\n"); #endif try { fc::tcp_server _tcp_serv; //maps keyhoteeId -> founderCode,points,publicKey bts::db::level_map<std::string,record> _known_names; _known_names.open( "reg_db" ); auto fix_itr = _known_names.begin(); while (fix_itr.valid()) { std::string kid = fix_itr.key(); std::string asciiName; convertToAscii(kid,&asciiName); if (kid != asciiName) { auto unchanged_record = fix_itr.value(); deb << kid << " to " << asciiName << std::endl; _known_names.remove(kid); _known_names.store(asciiName,unchanged_record); } ++fix_itr; } if (argc == 3) { //update records in goood dbase with matching records from messy database std::cerr << "update records with records from messy database" << std::endl; bts::db::level_map<std::string,record> _messy_names; _messy_names.open( "messy_db" ); //walkthrough all names in messydb, see if it matches record in good db, update good db with public key if so auto itr = _messy_names.begin(); while( itr.valid() ) { auto found_itr = _known_names.find( itr.key() ); if (found_itr.valid()) { auto id_record = itr.value(); auto found_record = found_itr.value(); found_record.pub_key = id_record.pub_key; ilog( "${key} => ${value}", ("key",itr.key())("value",found_record)); _known_names.store( itr.key(), found_record); } else //report couldn't be found in debug.txt { std::string lower_kid = itr.key(); boost::to_lower(lower_kid); found_itr = _known_names.find(lower_kid); if (found_itr.valid()) deb << "found " << itr.key() << " as " << lower_kid << std::endl; else deb << "missing " << itr.key() << std::endl; } ++itr; } } // TODO: import CSV list of new keyhoteeIds that can be registered else if( argc == 2 ) { FC_ASSERT( fc::exists(argv[1]) ); std::ifstream in(argv[1]); std::string line; std::getline(in, line); int num_commas = std::count(line.begin(), line.end(), ','); deb << "num_commas=" << num_commas << "\n"; std::cerr << "num_commas=" << num_commas << "\n"; if (num_commas == 1) { //fix badly transcribed keyhoteeIDs (replace 1st column names with 2nd column names) while( in.good() ) { std::stringstream ss(line); std::string original_name; //old keyhoteeId std::getline( ss, original_name, ',' ); std::string name; convertToAscii(original_name,&name); std::string original_new_name; //old keyhoteeId std::getline(ss, original_new_name); std::string new_name; convertToAscii(original_new_name,&new_name); try { auto itr = _known_names.find( name ); if (itr.valid()) { deb << "found " << name << " replacing with " << new_name << std::endl; auto rec = itr.value(); rec.key = new_name; _known_names.store( new_name, rec ); } else { deb << name << " NOT FOUND when trying to replace" << std::endl; } } catch (...) { deb << "Couldn't find name " << name << std::endl; } } deb << "FINISHED replacing bad KIDs" << std::endl; deb.flush(); } else if (num_commas == 2 || num_commas == 3) { while( in.good() ) { std::stringstream ss(line); std::string oname; //keyhoteeId std::getline( ss, oname, ',' ); std::string name; convertToAscii(oname,&name); //boost::to_lower(name); std::string key; //founderCode std::getline( ss, key, ',' ); std::string points; std::getline( ss, points, ',' ); deb << "OK"<< std::endl; try { auto itr = _known_names.find( name ); if (itr.valid()) { deb << "found " << name << std::endl; } else { deb << "adding " << name << "\t\t" << key << "\t\t'" << points << std::endl; double pointsd = atof( points.c_str() ); _known_names.store( name, record( key, pointsd ) ); } } catch (...) { deb << "Couldn't find name" << std::endl; } std::getline(in, line); } deb << "FINISHED importing more KIDs" << std::endl; deb.flush(); } else if (num_commas >= 5) { //update registered keyhoteeIds with public keys sent from web form while( in.good() ) { std::stringstream ss(line); std::string date; std::getline( ss, date, ',' ); std::string email; std::getline( ss, email, ',' ); std::string oname; //keyhoteeId std::getline( ss, oname, ',' ); std::string name; convertToAscii(oname,&name); //boost::to_lower(name); std::string key; //founderCode std::getline( ss, key, ',' ); std::string public_key; std::getline( ss, public_key, ',' ); auto itr = _known_names.find( name ); if (!itr.valid()) { std::string similar_name = name; boost::to_lower(similar_name); itr = _known_names.find( similar_name ); if (!itr.valid()) { boost::to_upper(similar_name); itr = _known_names.find( similar_name ); } } if( itr.valid() ) { auto record_to_update = itr.value(); if (!public_key.empty()) { record_to_update.pub_key = public_key; if (record_to_update.key == key) _known_names.store( name, record_to_update); else deb << "Founder code mismatch for " << name << std::endl; } else { deb << "Public key empty for " << name << std::endl; } } else { deb << "Looking for " << name << " "; std::string similar_name = name; boost::to_lower(similar_name); if (!is_known(_known_names,similar_name)) boost::to_upper(similar_name); if (!is_known(_known_names,similar_name)) deb << "NOT FOUND" << std::endl; deb.flush(); } std::getline(in, line); } } else { std::cerr << "Invalid file format: file should have 3 or 5+ fields, has " << num_commas << std::endl; return 1; } } else //argc != 2 { //configure logger to also write to log file fc::file_appender::config ac; /** \warning Use wstring to construct log file name since %TEMP% can point to path containing native chars. */ ac.filename = "log.txt"; ac.truncate = false; ac.flush = true; fc::logger::get().add_appender( fc::shared_ptr<fc::file_appender>( new fc::file_appender( fc::variant(ac) ) ) ); std::ofstream report_stream("report.txt"); int id_count = 0; int unregistered_count = 0; auto itr = _known_names.begin(); while( itr.valid() ) { auto id_record = itr.value(); //ilog( "${key} => ${value}", ("key",itr.key())("value",id_record)); ilog( "${key}, ${pub_key}, ${p}", ("key",itr.key())("pub_key",id_record.pub_key)("p",id_record.points)); report_stream << itr.key() << "," << id_record.pub_key << std::endl; ++id_count; if (id_record.pub_key.empty()) ++unregistered_count; ++itr; } report_stream.close(); ilog( "Total Id Count: ${id_count} Unregistered: ${unregistered_count}",("id_count",id_count)("unregistered_count",unregistered_count) ); } _tcp_serv.listen( 3879 ); //fc::future<void> _accept_loop_complete = fc::async( [&]() { while( true ) //!_accept_loop_complete.canceled() ) { fc::tcp_socket_ptr sock = std::make_shared<fc::tcp_socket>(); try { _tcp_serv.accept( *sock ); } catch ( const fc::exception& e ) { elog( "fatal: error opening socket for rpc connection: ${e}", ("e", e.to_detail_string() ) ); //exit(1); } auto buf_istream = std::make_shared<fc::buffered_istream>( sock ); auto buf_ostream = std::make_shared<fc::buffered_ostream>( sock ); auto json_con = std::make_shared<fc::rpc::json_connection>( std::move(buf_istream), std::move(buf_ostream) ); json_con->add_method( "register_key", [&]( const fc::variants& params ) -> fc::variant { FC_ASSERT( params.size() == 3 ); auto oname = params[0].as_string(); oname = fc::trim(oname); std::string name; convertToAscii(oname,&name); auto rec = _known_names.fetch( name ); //ensure founder code is correct if( rec.key != params[1].as_string() ) //, "Key ${key} != ${expected}", ("key",params[1])("expected",rec.key) ); { FC_ASSERT( !"Invalid Key" ); } //report if key is already registered, don't allow re-registering if( !(rec.pub_key.size() == 0 || rec.pub_key == params[2].as_string() ) ) { // FC_ASSERT( rec.pub_key.size() == 0 || rec.pub_key == params[2].as_string() ); FC_ASSERT( !"Key already Registered" ); } //register the public key rec.pub_key = params[2].as_string(); _known_names.store( name, rec ); return fc::variant( rec ); }); fc::async( [json_con]{ json_con->exec().wait(); } ); } // } // ); //_accept_loop_complete.wait(); return 0; } catch ( fc::exception& e ) { elog( "${e}", ("e",e.to_detail_string() ) ); } }
void basic_test_1(){ Tank tank1; tank1.siz_x = 1000; tank1.siz_y = 1000; tank1.rep_fields.resize(N_FIELDS); tank1.nutrient_field = Array(tank1.siz_x, tank1.siz_y, 0.00); tank1.sunlight_field = Array(tank1.siz_x, tank1.siz_y, 0.00); tank1.reps = vector<Rep>(); tank1.dt = 1.0; Rep rep1; Field field1; field1.gen_single_point(); rep1.field = field1; rep1.alive = true; rep1.n_replicate = 2; rep1.tank = &tank1; rep1.energy = 0.5; rep1.mass = 2.0; for (int i=0; i<25; i++){ tank1.reps.push_back(rep1); tank1.reps.back().identifier = i; double p = rng() * 0.1; double theta = rng() * 2.0 * PI; tank1.reps.back().p_x = p * sin(theta); tank1.reps.back().p_y = p * cos(theta); tank1.reps.back().x = floor(rng() * tank1.siz_x); tank1.reps.back().y = floor(rng() * tank1.siz_y); tank1.reps.back().energy = rng() *2.0; } int n_reports = 10000; int n_steps = 100000; ofstream report_stream("report.txt"); for (int i=0; i<1000000; i++){ bool report = false; tank1.time_step(); if (i % (n_steps / n_reports) == 0){ report_stream << i << " " << tank1.n_alive << endl; } //Rep r = tank1.reps[15]; //cout << i << " " << r.x << " " << r.y << " " << r.energy << " " << tank1.n_alive << " " << tank1.nutrient_field(floor(r.x), floor(r.y)) << endl; //printVector(tank1.reps_dead); } ofstream final_reps_stream("final_reps.txt"); for (auto & rep : tank1.reps){ for (auto & fp : rep.field.points){ if (rep.alive) final_rep_stream << fp.vals[0] << " " << fp.vals[1] << " " << rep.mass << endl; } } cout << tank1.n_alive << endl; }
/***************************************************************************** Takes as an input a training file of peptide samples. For each fragment type with sufficient count, it creates a dataset and trains the rank model. ******************************************************************************/ void PartitionModel::train_partition_model( PeakRankModel *prank, char *sample_file_path, int _charge, int _size_idx, int _mobility, int frag_idx_to_train, char *report_dir, int max_num_rounds, char *test_set_file, int test_peptide_length, char *stop_signal_file, weight_t max_weight_ratio) { const float min_ratio_detected = 0.15; Config *config = prank->get_config(); const vector<FragmentType>& all_fragments = config->get_all_fragments(); const vector<mass_t>& aa2mass = config->get_aa2mass(); const int num_frags = all_fragments.size(); if (max_num_rounds<0) max_num_rounds = 1000; charge = _charge; size_idx = _size_idx; mobility = _mobility; vector<TrainingPeptide> sample_tps, test_tps; vector<int> frag_counts; vector<int> frag_detected; vector<int> length_counts; read_training_peptides_from_file(sample_file_path,sample_tps); cout << "Read " << sample_tps.size() << " training tps..."; int num_tps_to_add = 0; if (prank->get_feature_set_type() == 2) { if (sample_tps.size()<25000) num_tps_to_add = 1; if (sample_tps.size()<15000) num_tps_to_add = 2; if (sample_tps.size()<10000) num_tps_to_add = 3; if (sample_tps.size()<5000) num_tps_to_add = 4; cout << "Adding at most " << num_tps_to_add << " per tp." << endl; } if (prank->get_feature_set_type() == 2) convert_tps_to_partial_denovo(config,sample_tps,num_tps_to_add); if (test_set_file) { read_training_peptides_from_file(test_set_file, test_tps); cout << "Read " << test_tps.size() << " test tps..."; if (prank->get_feature_set_type() == 2) convert_tps_to_partial_denovo(config,test_tps,num_tps_to_add); } // Create intial report on dataset frag_counts.resize(num_frags,0); frag_detected.resize(num_frags,0); length_counts.resize(200,0); int numH=0,numK=0, numR=0; int i; for (i=0; i<sample_tps.size(); i++) { const TrainingPeptide& tp = sample_tps[i]; int f; for (f=0; f<tp.intens.size(); f++) { int j; for (j=1; j<tp.intens[f].size(); j++) { if (tp.intens[f][j]>=0) frag_counts[tp.frag_idxs[f]]++; if (tp.intens[f][j]>0) frag_detected[tp.frag_idxs[f]]++; } } int j; for (j=0; j<tp.amino_acids.size(); j++) { if (tp.amino_acids[j]==His) numH++; if (tp.amino_acids[j]==Lys) numK++; if (tp.amino_acids[j]==Arg) numR++; } length_counts[tp.amino_acids.size()]++; } // report and select fragments for training cout << "# training peptides: " << sample_tps.size() << endl; cout << "Avg #R: " << numR/(double)sample_tps.size() << endl; cout << "Avg #K: " << numK/(double)sample_tps.size() << endl; cout << "Avg #H: " << numH/(double)sample_tps.size() << endl; cout << endl << "Sample lengths:" << endl; for (i=0; i<length_counts.size(); i++) if (length_counts[i]>0) cout << i << "\t" << length_counts[i] << endl; cout << endl; for (i=0; i<all_fragments.size(); i++) { float ratio = (float)frag_detected[i]/frag_counts[i]; cout << all_fragments[i].label << "\t" << frag_detected[i] << " / " << frag_counts[i] << "\t = " << setprecision(3) << ratio << endl; if (ratio>=min_ratio_detected) { if (frag_idx_to_train<0 || frag_idx_to_train == i) fragment_type_idxs.push_back(i); } } cout << endl; cout << "Max weight ratio: " << max_weight_ratio << endl; if (fragment_type_idxs.size() == 0) { cout << "No models to train!" << endl; return; } // Train each selected model frag_models.resize(fragment_type_idxs.size()); int f; for (f=0; f<fragment_type_idxs.size(); f++) { const int frag_idx = fragment_type_idxs[f]; if (frag_idx>0 && frag_idx != frag_idx_to_train) continue; const int frag_charge = all_fragments[frag_idx].charge; cout << "Training frag " << frag_idx << " (" << config->get_fragment(frag_idx).label <<")" << endl; // fill RankBoostSamples and create rank ds RankBoostDataset rank_ds,test_ds; vector<float> peak_intens; vector<PeakStart> peak_starts; vector<float> max_annotated_intens; // Train RankBoostModel cout << "TRAINING..." << endl; // initialize and read the test set if it exists RankBoostDataset *test_set_ptr=NULL; if (test_set_file) { vector<float> test_peak_intens; vector<PeakStart> test_peak_starts; vector<float> test_max_annotated_intens; cout << "Reading test tps..." << endl; prank->read_training_peptides_into_rank_boost_dataset(frag_idx, charge, test_tps, test_ds, test_peak_intens, test_peak_starts, test_max_annotated_intens); cout << "Creating test phi list..." << endl; create_phi_list_from_samples(test_peak_intens, test_peak_starts, test_max_annotated_intens, test_ds.get_non_const_phi_support()); test_ds.compute_total_phi_weight(); test_set_ptr = &test_ds; // choose length (try shorte peptide if not eonough samples, go for the max) if (test_peptide_length == 0) { vector<int> test_length_counts; test_length_counts.resize(200,0); const vector<RankBoostSample>& samples = test_ds.get_samples(); vector<int> sizes; sizes.resize(test_ds.get_num_groups(),0); int i; for (i=0; i<samples.size(); i++) sizes[samples[i].group_idx]=samples[i].tag3; for (i=1; i<sizes.size(); i++) test_length_counts[sizes[i]]++; int max=0; for (i=0; i<200; i++) { if (test_length_counts[i]>=500) break; if (test_length_counts[i]>test_length_counts[max]) max=i; } if (i<200) { test_peptide_length = i; } else test_peptide_length = max; } cout << "test length " << test_peptide_length << endl; } cout << "read training tps..." << endl; prank->read_training_peptides_into_rank_boost_dataset(frag_idx, charge, sample_tps, rank_ds, peak_intens, peak_starts, max_annotated_intens); RankBoostModel& boost = frag_models[f]; boost.init_rankboost_model_feature_names(prank->get_binary_names(),prank->get_real_names()); cout << "create training phi list..." << endl; create_phi_list_from_samples(peak_intens,peak_starts, max_annotated_intens, rank_ds.get_non_const_phi_support()); cout << "initializing boost..." << endl; rank_ds.compute_total_phi_weight(); rank_ds.initialize_potenital_lists(); rank_ds.initialize_binary_one_lists(prank->get_binary_names().size()); rank_ds.initialize_binary_ordered_phi_lists(boost.get_ptr_to_binary_feature_names()); rank_ds.initialzie_real_feature_table(prank->get_real_names().size()); rank_ds.set_max_ratio_for_regular_update(max_weight_ratio); boost.init_rankboost_model_for_training(rank_ds,40,100); rank_ds.initialize_real_vote_lists(boost); // boost.summarize_features(rank_ds.get_samples()); char report_prefix[512]; if (report_dir) sprintf(report_prefix,"%s/%s_%d",report_dir, partition_name.c_str(),frag_idx); vector<idx_weight_pair> miss_pairs; boost.train_rankboost_model(rank_ds, max_num_rounds, &miss_pairs, test_set_ptr, test_peptide_length, report_prefix, stop_signal_file); // final report if (report_dir) { char name_buff[512]; sprintf(name_buff,"%s_train_miss_pairs.txt",report_prefix); ofstream report_stream(name_buff); if (! report_stream.is_open() || ! report_stream.good()) { cout << "Error: couldn't open pairs report file for writing:" << name_buff << endl; exit(1); } simple_print_peak_pairs(miss_pairs, sample_tps, rank_ds, prank, frag_idx, 250, report_stream); report_stream.close(); sprintf(name_buff,"%s_feature_list.txt",report_prefix); ofstream feature_stream(name_buff); if (! feature_stream.is_open() || ! feature_stream.good()) { cout << "Error: couldn't feature_stream file for writing:" << name_buff << endl; exit(1); } cout << "[..."; boost.ouput_importance_ranked_feature_list(rank_ds,feature_stream); cout << " ...]" << endl; feature_stream.close(); // write model (also compresses features and deletes the default values) sprintf(name_buff,"%s_model.txt",report_prefix); ofstream model_stream(name_buff); boost.write_rankboost_model(model_stream,true); model_stream.close(); } else // send to cout simple_print_peak_pairs(miss_pairs, sample_tps, rank_ds, prank, frag_idx, 100); } }