Пример #1
0
void MFindDialog::FindAll(
	const string&	inWhat,
	bool			inIgnoreCase,
	bool			inRegex,
	MMultiMethod	inMethod,
	fs::path			inDirectory,
	bool			inRecursive,
	bool			inTextFilesOnly,
	const string&	inFileNameFilter)
{
	mStopFindAll = false;
	
	try
	{
		FileSet files;
		auto_ptr<MMessageList> list(new MMessageList);
		
		GetFilesForFindAll(inMethod, inDirectory,
			inRecursive, inTextFilesOnly, inFileNameFilter, files);
		
		for (FileSet::iterator file = files.begin(); file != files.end(); ++file)
		{
			if (mStopFindAll)
				break;
			
			SetStatusString(file->string());
			MUrl url(*file);
			
			bool searched = false;
			
			gdk_threads_enter();
			MTextDocument* doc = dynamic_cast<MTextDocument*>(MDocument::GetDocumentForURL(url));

			if (doc != nil)
			{
				doc->FindAll(inWhat, inIgnoreCase, inRegex, false, *list.get());
				searched = true;
			}
			gdk_threads_leave();
			
			if (not searched)
				MTextDocument::FindAll(*file, inWhat, inIgnoreCase, inRegex, false, *list.get());
		}
		
		mFindAllResult = list.release();
	}
	catch (exception& e)
	{
		mFindAllResult = new MMessageList;	// flag failure... sucks.. I know
		mFindAllResult->AddMessage(kMsgKindError, fs::path(), 0, 0, 0, "Error in find all, sorry");
		mFindAllResult->AddMessage(kMsgKindError, fs::path(), 0, 0, 0, e.what());
	}	
	catch (...)
	{
		mFindAllResult = new MMessageList;	// flag failure... sucks.. I know
		mFindAllResult->AddMessage(kMsgKindError, fs::path(), 0, 0, 0, "Error in find all, sorry");
	}	
}
Пример #2
0
bool
PathHandler::_HasFile(const node_ref& nodeRef) const
{
	FileEntry setEntry;
	setEntry.ref.device = nodeRef.device;
	setEntry.node = nodeRef.node;
		// name does not need to be set, since it's not used for comparing
	FileSet::const_iterator iterator = fFiles.find(setEntry);
	return iterator != fFiles.end();
}
Пример #3
0
FileSet FileSet::FromManifestsInDir(const boost::filesystem::path &path) {
    FileSet ret;

    directory_iterator end_iter;
    for (directory_iterator iter(path); iter != end_iter; ++iter)
        if (!is_directory(iter->status()) && iter->path().stem() == "MANIFEST")
            ret.ParseAndAddEntries(read_file(iter->path()));

    return ret;
}
void FileSystem::GetDirectoryFiles(FileSet &outFileSet, const char *inPath,
                const char *inMsk, bool inIsCase /*= true*/, bool inRecurs /*= false*/,
                bool inGetDir /*= false*/)
{
	// Quit if there is an incorrect path
	if (!inPath || *inPath == 0)
		return;

	// Replace the directory separator
  char path[MAX_PATH + 1], *s;
  strcpy(path, inPath);
	while ((s = strchr(path, '\\')) != NULL)
		*s = DEF_DirectorySeparator;
  // Clear the last directory separator if it exists
  s = path + strlen(path) - 1;
  if (*s == DEF_DirectorySeparator)
    *s = 0;

  DIR *dir;
  struct dirent *ent;

  // Open the directory
  if ((dir = opendir(path)) == NULL)
		return;

  // Get directory content
  char buf[MAX_PATH + 1];
  rewinddir(dir);
  while ((ent = readdir(dir)) != NULL) {
    // Clear names begining with a dot (hidden files, current and parent directory)
    if (*(ent->d_name) == '.')
      continue;
    // Get sub directories or recurse or clear them
    DIR *nextdir;
    sprintf(buf, "%s%c%s", path, DEF_DirectorySeparator, ent->d_name);
    nextdir = opendir(buf);
    if (nextdir) {
      closedir(nextdir);
      if (inGetDir) {
				if (!inMsk || PatternMatch(ent->d_name, inMsk, inIsCase))
          outFileSet.insert(buf);
      }
			// Recursive call ?
      if (inRecurs)
        GetDirectoryFiles(outFileSet, buf, inMsk, inIsCase, inRecurs, inGetDir);
      continue;
    }
    if (inGetDir)
      continue;
		if (!inMsk || PatternMatch(ent->d_name, inMsk, inIsCase))
      outFileSet.insert(buf);
  }
  if (closedir(dir) != 0)
		gLog.log(eTypLogError, "Err > Unable to close directory: %s", inPath);
}
Пример #5
0
FileSet FileSet::MinusExactMatches(const FileSet &other) const
{
    FileSet diff;

    FileSet::EntrySet::const_iterator iter = Entries().begin();
    for(; iter != Entries().end(); ++iter)
        if (!other.HasMatchingEntry(*iter))
            diff.AddEntry(*iter);

    return diff;
}
Пример #6
0
FileSystem::FileSet BlockFileSystem::listFiles()
{
    ScopedLock<FastMutex> lock(m_lock);

    FX_NS(utility)::File f(m_sFsPath);
    vector<tstring> files;
    f.list(files, true);

    FileSet fileSet;
    for (vector<tstring>::const_iterator it = files.begin();
         it != files.end(); ++it)
    {
        fileSet.insert(*it);
    }
    return fileSet;
}
Пример #7
0
status_t
PathHandler::_RemoveFile(const node_ref& nodeRef)
{
	TRACE("  REMOVE FILE %ld:%Ld\n", nodeRef.device, nodeRef.node);

	FileEntry setEntry;
	setEntry.ref.device = nodeRef.device;
	setEntry.node = nodeRef.node;
		// name does not need to be set, since it's not used for comparing
	FileSet::iterator iterator = fFiles.find(setEntry);
	if (iterator == fFiles.end())
		return B_ENTRY_NOT_FOUND;

	watch_node(&nodeRef, B_STOP_WATCHING, this);
	fFiles.erase(iterator);
	return B_OK;
}
Пример #8
0
void
PathHandler::Dump()
{
	TRACE("WATCHING DIRECTORIES:\n");
	DirectorySet::iterator i = fDirectories.begin();
	for (; i != fDirectories.end(); i++) {
		TRACE("  %ld:%Ld (%s)\n", i->node.device, i->node.node, i->contained
			? "contained" : "-");
	}

	TRACE("WATCHING FILES:\n");

	FileSet::iterator j = fFiles.begin();
	for (; j != fFiles.end(); j++) {
		TRACE("  %ld:%Ld\n", j->ref.device, j->node);
	}
}
Пример #9
0
void
PathHandler::_NotifyTarget(BMessage* message, const node_ref& nodeRef) const
{
	BMessage update(*message);
	update.what = B_PATH_MONITOR;

	TRACE("_NotifyTarget(): node ref %ld.%Ld\n", nodeRef.device, nodeRef.node);

	WatchedDirectory directory;
	directory.node = nodeRef;

	DirectorySet::const_iterator iterator = fDirectories.find(directory);
	if (iterator != fDirectories.end()) {
		if (_WatchFilesOnly()) {
			// stat or attr notification for a directory
			return;
		}
		BDirectory nodeDirectory(&nodeRef);
		BEntry entry;
		if (nodeDirectory.GetEntry(&entry) == B_OK) {
			BPath path(&entry);
			update.AddString("path", path.Path());
		}
	} else {
		if (_WatchFoldersOnly()) {
			// this is bound to be a notification for a file
			return;
		}
		FileEntry setEntry;
		setEntry.ref.device = nodeRef.device;
		setEntry.node = nodeRef.node;
			// name does not need to be set, since it's not used for comparing
		FileSet::const_iterator i = fFiles.find(setEntry);
		if (i != fFiles.end()) {
			BPath path(&(i->ref));
			update.AddString("path", path.Path());
		}
	}

	// This is in case the target is interested in figuring out which
	// BPathMonitor::StartWatching() call the message is resulting from.
	update.AddString("watched_path", fPath.Path());

	fTarget.SendMessage(&update);
}
Пример #10
0
  void LocalProcess::directoryChanged(const QString &str)
  {
    FileSet fs = dirFiles(openstudio::toQString(m_outdir));

    std::vector<FileSet::value_type> diff;

    {
      QMutexLocker l(&m_mutex);
      std::set_symmetric_difference(fs.begin(), fs.end(), 
          m_outfiles.begin(), m_outfiles.end(),
          std::back_inserter(diff));

      m_outfiles = fs;
    }

    std::for_each(diff.begin(), diff.end(), boost::bind(&LocalProcess::emitUpdatedFileInfo, this, _1));

    m_process.checkProcessStatus();
  }
Пример #11
0
status_t
PathHandler::_AddFile(BEntry& entry, bool notify)
{
	if ((fFlags & (WATCH_NODE_FLAG_MASK & ~B_WATCH_DIRECTORY)) == 0)
		return B_OK;

#ifdef TRACE_PATH_MONITOR
{
	BPath path(&entry);
	TRACE("  ADD FILE %s\n", path.Path());
}
#endif

	node_ref nodeRef;
	status_t status = entry.GetNodeRef(&nodeRef);
	if (status != B_OK)
		return status;

	// check if we already know this file

	// TODO: It should be possible to omit this check if we know it
	// can't be the case (for example when adding subfolders recursively,
	// although in that case, the API user may still have added this file
	// independently, so for now, it should be the safest to perform this
	// check in all cases.)
	if (_HasFile(nodeRef))
		return B_OK;

	status = watch_node(&nodeRef, (fFlags & WATCH_NODE_FLAG_MASK), this);
	if (status != B_OK)
		return status;

	FileEntry setEntry;
	entry.GetRef(&setEntry.ref);
	setEntry.node = nodeRef.node;

	fFiles.insert(setEntry);

	if (notify && _WatchFilesOnly()) {
		// We also notify our target about new files if it's only interested
		// in files; it won't be notified about new directories, so it cannot
		// know when to search for them.
		BMessage update;
		update.AddInt32("opcode", B_ENTRY_CREATED);
		update.AddInt32("device", nodeRef.device);
		update.AddInt64("directory", setEntry.ref.directory);
		update.AddString("name", setEntry.ref.name);
		update.AddBool("added", true);

		_NotifyTarget(&update, nodeRef);
	}

	return B_OK;
}
Пример #12
0
void create_MSB_query_for_file_list(const FileManager& fm, 
									AllScoreModels *model, 
									int max_query_size)
{
	Config *config = model->get_config();
	FileSet		fs;

	fs.select_all_files(fm);

	while (1)
	{
		Spectrum spec;
		PrmGraph prm;
		SingleSpectrumFile *ssf;
		vector<MSBSequence> msb_sequences;

		if (! fs.get_next_spectrum(fm,config,&spec,&ssf)) 
			break;

		
		model->init_model_for_scoring_spectrum(&spec);
		/*
		AllScoreModels *_model, 
										  Spectrum *spectrum,
										  mass_t _pm_with_19, 
										  int spec_charge, 
										  bool add_all_pepitde_nodes, 
										  bool only_basic_score*/
		prm.create_graph_from_spectrum(model, &spec, spec.get_org_pm_with_19());
		model->score_graph_edges(prm);

		spec.print_expected_by();
		prm.print_with_multi_edges();

//\\	generate_MSB_sequences_from_PrmGraph(prm,msb_sequences,10);
		
		exit(0);
		
	}

}
Пример #13
0
int main (int argc, char *argv[])
{
    bool skip_decrypted = false;

    FileSet fileSet;
    PathCrawler crawler(&fileSet);
    
    printf("---------------------------\n");
    printf("DMA recon v1.4\n");
    printf("---------------------------\n");
    if (argc >= 2 && argv[1][0] == '1') {
        skip_decrypted = true;
        printf("Skipping decrypted!\n");
    }
    
    crawler.listDir(crawler.startPath, skip_decrypted);
    printf("listing finished...\n");
    fileSet.printSummary();

    system("pause");
    return 0;

}
Пример #14
0
  LocalProcess::FileSet LocalProcess::dirFiles(const QString &dir) const
  {
    QFileInfoList fil;

    
    QDir subdirs(dir, "mergedjob-*", QDir::Name, QDir::Dirs);
    QFileInfoList mergedjobdirs = subdirs.entryInfoList();

    for (QFileInfoList::const_iterator itr = mergedjobdirs.begin();
         itr != mergedjobdirs.end();
         ++itr)
    {

      QDir mergeddir(itr->absoluteFilePath(), "", QDir::Name, QDir::Files);
      fil.append(mergeddir.entryInfoList());
    }
  

    QDir d(dir, "", QDir::Name, QDir::Files);
    fil.append(d.entryInfoList());

    QFileInfoList filtered;

    // Filter out all files that are part of the set of input files. Everything remaining should be an outputfile
    for (QFileInfoList::const_iterator itr = fil.begin();
         itr != fil.end();
         ++itr)
    {
      bool partofinput = false;
      for (std::vector<std::pair<openstudio::path, openstudio::path> >::const_iterator itr2 = m_requiredFiles.begin();
          itr2 != m_requiredFiles.end();
          ++itr2)
      {
        QString fileName = itr->fileName();
        QString fileName2 = toQString(itr2->second.filename());
        if (fileName == fileName2)
        {
          partofinput = true;
          break;
        }
      }

      if (!partofinput)
      {
        filtered.push_back(*itr);
      }
    }

    FileSet out;

    typedef FileInfo (*filetransform)(QFileInfo);

    try{
      std::transform(filtered.begin(), filtered.end(), std::inserter(out, out.end()), 
          static_cast<filetransform>(&RunManager_Util::dirFile));
    } catch(openstudio::Exception& e) {
      LOG_AND_THROW("Exception caught " << e.what());
    }

    return out;
  }
Пример #15
0
int main()
{
	Config *config=NULL;  
	FileManager fm;
	FileSet fs;
	AdvancedScoreModel model; 
	EdgeModel edge_model;
	PeptideRankScorer drs;
	 
	

	rand_seed(112233);


//	train_all(); 

	model.read_model("CID_IT_TRYP");
	config = model.get_config(); 
	config->apply_selected_PTMs("C+57:M+16:Q-17");

//	fm.init_from_file(config,"C:\\Work\\msms5\\DnvScore\\all_ds\\HEK_98_3_unique_30.mgf");
	fs.select_files_in_mz_range(fm,300,2000,3);
	fs.randomly_reduce_ssfs(100);
	
	vector<int> cc(4,0);
	cc[3]=100;
	fs.create_mgf_file(fm,config,"HEK_4_30e.mgf",cc);
	exit(0);

//	model.get_config()->apply_selected_PTMs("C+57:M+16:Q-17"); 


	PMCSQS_Scorer *pmcsqs = (PMCSQS_Scorer *)model.get_pmcsqs_ptr();

	fm.init_from_list_file(config,"C:\\Work\\msms5\\PepNovoHQ\\train3.txt");

	pmcsqs->benchmark_pm_selection(config,fm,0.3);

	exit(0);

/*	benchmark_shew(model,"C:\\Work\\msms5\\PepNovoHQ\\Shew_test_10.mgf");
	exit(0);

		drs.set_model_type(0);
	drs.read_denovo_rank_scorer_model("C:\\Work\\msms5\\PepNovoHQ\\Models\\DBSCORE\\DBSCORE_rank_model.txt");
	drs.give_de_novo_and_peak_match_examples("C:\\Work\\msms5\\DnvScore\\all_db_hits",
									 "C:\\Work\\msms5\\DnvScore\\seq_freqs\\sequences",
									 "C:\\Work\\msms5\\DnvScore\\dnv_full_parts",
									 "C:\\Work\\msms5\\DnvScore\\dicty2_all.txt",
									 2,2);
	exit(0);
	
	drs.read_denovo_rank_scorer_model("C:\\Work\\msms5\\PepNovoHQ\\Models\\DBSCORE\\DBSCORE_rank_model.txt");
	drs.rescore_inspect_results("C:\\Work\\msms5\\DnvScore\\inspect_res\\H293-40ul-08.mzXML",
								"C:\\Work\\msms5\\DnvScore\\inspect_res\\H293-40ul-08.txt",
								"C:\\Work\\msms5\\DnvScore\\inspect_res\\H293-40ul-08_new.txt");

	exit(0);

//	test_denovo_integrity(model,"C:\\Work\\msms5\\DnvScore\\all_ds\\Dicty_98_2_unique_8.mgf", 20000, 8); 

//	benchmark_ranking_on_denovo("C:\\Work\\msms5\\PepNovoHQ\\Models\\DNVSCORE5\\DNVSCORE5_rank_model.txt",
//		"C:\\Work\\msms5\\DnvScore\\test\\DNVSCORE4_test_10.mgf",400,10); // 

//	benchmark_ranking_on_full_denovo("C:\\Work\\msms5\\PepNovoHQ\\Models\\DNVFULL\\DNVFULL_rank_model.txt",
//		"C:\\Work\\msms5\\DnvScore\\test\\FULL_test_10.mgf",1000);
//	exit(0); 



	fm.init_from_list_file(config,//"C:\\Work\\msms5\\DnvScore\\short2_train_mgf_list.txt");
	 "C:\\Work\\msms5\\DnvScore\\comp2_train_mgf_list.txt");
	// "C:\\Work\\msms5\\NewScore\\lists\\Shew_98_3_unique_mgf_list.txt");
	fs.select_files(fm,0,2500,-1,-1,2);
	
	find_special_PTM_frags_using_offset_counts("S",fm,fs.get_ssf_pointers(),&model,2);

	exit(0); 
	
	drs.read_denovo_rank_scorer_model("C:\\Work\\msms5\\PepNovoHQ\\Models\\DNVSC_RANK\\LTQ_DNVRANK_model.txt");
	drs.test_model("C:\\Work\\msms5\\DnvScore\\test_sets\\LTQ_DNVRANK_test_10.mgf",2000);

	drs.train_denovo_partition_model("C:\\Work\\msms5\\DnvScore\\all_db_hits",
									 "C:\\Work\\msms5\\DnvScore\\seq_freqs\\sequences",
									 "C:\\Work\\msms5\\DnvScore\\comp_all_parts",
								//	 "C:\\Work\\msms5\\DnvScore\\short2_train_mgf_list.txt",
									 "C:\\Work\\msms5\\DnvScore\\comp2_train_mgf_list.txt",
									 2,
									 1,
									 30000,
									 5); 




//	model.read_model("ETD");
//	config = model.get_config();
//	config->apply_selected_PTMs("M+16:Q-17:N+1:C+57");

//	fm.init_from_list_file(config,"C:\\Work\\msms5\\PepNovoHQ\\ETD2\\ETD_unique_train.txt");
//	model.full_train_model("ETD",fm,0.5);
	
//	model.train_pmc_rank_models("C:\\Work\\msms5\\PepNovoHQ\\ETD2\\ETD_all_train.txt");
//	model.write_model();
	exit(0); 

//	train_all();

//	create_training_sets();
//	exit(0);
//	generate_size_reports();
//	test_sims();
//	data_set_stats();

//	convert_list_to_trianing_peptide_file(
//		"C:\\Work\\msms5\\NewScore\\lists\\Dicty_98_3_unique_mgf_list.txt",
//		"C:\\Work\\msms5\\NewScore\\tps\\Dicty_98_3_unique_tps.txt");

//	proline_cleavage_reports("b",2); 
//	exit(0);
//	center_cleavage_reports("y",3);
//	n_terminal_cleavage_reports("y",2);
//	c_terminal_cleavage_reports("y",-2);


//	find_best_similar_pairs("LTQ_LOW_TRYP",
//		"C:\\Work\\msms5\\PepNovoHQ\\pairs\\HEK_pos2.mgf",
//		"C:\\Work\\msms5\\PepNovoHQ\\pairs\\Shew_pos2.mgf",8);

//	find_self_similarity("LTQ_LOW_TRYP",
//		"C:\\Work\\msms5\\PepNovoHQ\\pairs\\HEK_pos2.mgf",
//		"C:\\Work\\msms5\\PepNovoHQ\\pairs\\Shew_pos2.mgf");

//	find_similar_pairs_ditrib("LTQ_LOW_TRYP",
//		"C:\\Work\\msms5\\PepNovoHQ\\pairs\\HEK_pos2.mgf",
//		"C:\\Work\\msms5\\PepNovoHQ\\pairs\\Shew_pos2.mgf");

//	find_homeometric_similarity_distrib("LTQ_LOW_TRYP",
//		"C:\\Work\\msms5\\PepNovoHQ\\pairs\\HEK_pos2.mgf",
//		"C:\\Work\\msms5\\PepNovoHQ\\pairs\\Shew_pos2.mgf");

//	find_self_similarity_ranges("LTQ_LOW_TRYP",
//		"C:\\Work\\msms5\\PepNovoHQ\\pairs\\SHEW18.mgf");

//	peptide_distances();

//	find_matches_similarity_distrib("LTQ_LOW_TRYP",
//		"C:\\Work\\msms5\\PepNovoHQ\\pairs\\HEK_pos2.mgf",
//		"C:\\Work\\msms5\\PepNovoHQ\\pairs\\Shew_pos2.mgf");

//	match_sim_exp();  

//	exit(0);

//	edge_model.train_all_edge_models("C:\\Work\\clust_exp\\LTQ_train2_ann_list.txt","LTQ",2); 
//	saa.train_saa_models("C:\\Work\\clust_exp\\LTQ_train2_ann_list.txt","LTQ",2); 
//	saa.train_saancd_models("C:\\Work\\clust_exp\\LTQ_train2_ann_list.txt","LTQ",2); 
//	daa.train_daa_models("C:\\Work\\clust_exp\\LTQ_train2_ann_list.txt","LTQ",2,0.25); 
//	daa.train_daancd_model("C:\\Work\\clust_exp\\LTQ_train2_ann_list.txt","LTQ",2); 

	
//	dot_prod_exp();   
//	qc_exp();  
//	qc_ann_exp("64068",true);
//	exit(0); 

	config = model.get_config();
	config->init_with_defaults();
	config->apply_selected_PTMs("M+16:Q-17:N+1:C+57");

	fm.init_from_file(config,"C:\\Work\\msms5\\PepNovoHQ\\ETD\\train_etd.mgf");
	model.full_train_model("ETD",fm,0.5);
	exit(0); 

	if (1)
	{
		model.read_model("LTQ_LOW_TRYP");

	//	model.get_config()->apply_selected_PTMs("C+57:M+16");
	//	model.get_config()->init_with_defaults();
		model.get_config()->apply_selected_PTMs("M+16:C+57:Q-17");

	//	model.test_pmc("C:\\Work\\msms5\\PepNovoHQ\\pmcsqs\\sqs_train_1.mgf",1);
	//	model.compute_sqs_cum_stats_for_ided("C:\\Work\\msms5\\NewScore\\lists\\all_HEK_mgf_list.txt");
	//	model.compute_sqs_cum_stats_for_crap("C:\\Work\\msms5\\PepNovoHQ\\pmcsqs\\crap_list.txt");
	//	model.write_model();

		model.compute_sqs_cum_stats_for_ided("C:\\Work\\msms5\\PepNovoHQ\\pmcsqs\\H40good\\H40good_mgf_list.txt");

	///	model.benchmark_sqs("C:\\Work\\msms5\\PepNovoHQ\\small_list.txt",
	//						"C:\\Work\\msms5\\PepNovoHQ\\small_anns.txt");

	//	model.benchmark_sqs("C:\\Work\\msms5\\PepNovoHQ\\tmp\\H40ul_0_list.txt",
	//						"C:\\Work\\msms5\\PepNovoHQ\\H40ul55_missed.txt");
	//						"C:\\Work\\msms5\\PepNovoHQ\\pmcsqs\\H40ul98_anns.txt");
	exit(0);


	DAT_Converter dat;
	dat.create_dat_files_for_anns(model.get_config(),
								  "C:\\Work\\Data\\Briggs\\HEK293\\40ul_list.txt",
								//  "C:\\Work\\msms5\\PepNovoHQ\\pmcsqs\\H40ul98_anns.txt",
								  "C:\\Work\\msms5\\PepNovoHQ\\H40ul55_missed.txt",
								  "C:\\Work\\msms5\\PepNovoHQ\\tmp\\",
								  "H4055");
//	
		
	//	model.train_pmc_rank_models( 
		//	"C:\\Work\\msms5\\NewScore\\lists\\HEK_98_1_unique_mgf_list.txt",0);
	//		"C:\\Work\\msms5\\NewScore\\lists\\all_unique_mgf_list.txt",0);
	//		"C:\\Work\\msms5\\NewScore\\lists\\all_HEK_mgf_list.txt",0);

	//	model.write_model();
	//	make_before_and_after_matrices(model.get_config(),"C:\\Work\\msms5\\lists\\mgf10.txt",3,"y");
		exit(0);

		FileManager fm;
		fm.init_from_list_file(model.get_config(),"C:\\Work\\msms5\\lists\\LTQ_train_list.txt");

		model.full_train_model("LTQ_IT_TRYP",fm,0.45);

		model.write_model(); 

	//	model.train_pmc("C:\\Work\\msms5\\lists\\pos_sqs_list.txt");

		vector< vector<float> > weights;
		weights.resize(4);
		weights[1].resize(3,0);
		weights[2].resize(3,0);
		weights[3].resize(3,0);
		weights[1][0] = 0.1; weights[1][1] = 0.1;  weights[1][2] = 0.4;
		weights[2][0] = 0.6; weights[2][1] = 0.75; weights[2][2] = 0.5;
		weights[3][0] = 0.3; weights[3][1] = 0.15; weights[3][2] = 0.1;
	
	//	model.train_sqs("C:\\Work\\msms5\\lists\\pos_sqs_list.txt",
	//					"C:\\Work\\msms5\\lists\\neg_sqs_list.txt",&weights);

	//	model.train_sqs("C:\\Work\\msms5\\NewScore\\lists\\all_unique_mgf_list.txt",
	//					"C:\\Work\\msms5\\PepNovoHQ\\pmcsqs\\crap_list.txt",&weights);
	//	config = model.get_config();

	//	config->set_tolerance(0.5);
//
	//	find_pair_similarities(config,"C:/Work/clust_exp/Results/Shew_bm/ShewBM40_0_1.mgf",
	//		"C:/Work/clust_exp/Results/Shew_bm/ShewBM40_pairs.txt");

		exit(0);
	}

	//	make_y_vectors("C:\\Work\\msms5\\PepNovoHQ\\pmcsqs\\sqs10_train_2.mgf",&model);
	//	create_training_files(config);
	//	exit(0);

	if (0)
	{
		PMCSQS_Scorer sqs;


	//	exit(0); 



	//	create_training_files(config);
		exit(0);
	}

	if (1)
	{
	//	fm.init_from_file(model.get_config(),"C:\\Work\\msms5\\PepNovoHQ\\orbi_ann.mgf");
	//	create_MSB_query_for_file_list(fm,&model);

		vector< vector<int> >    annotation_idxs;
		vector<mzXML_annotation> annotations;
		read_mzXML_annotations("C:/Work/Data/Briggs/HEK293_mzxml_list.txt", 
					"C:/Work/ClusterAnn/mzxml_anns3.txt", annotation_idxs, annotations, 35000);

	//	read_mzXML_annotations("C:/Work/ClusterAnn/H40ul_mgf_list.txt", 
	//				"C:/Work/ClusterAnn/mgf_anns.txt", annotation_idxs, annotations, 35000);


		cout << "Read annotations: " << annotations.size() << endl;

		fm.init_from_list_file_and_add_annotations(config,"C:/Work/Data/Briggs/HEK293_mzxml_list.txt",
			annotation_idxs, annotations,true);

	//	fm.init_from_list_file_and_add_annotations(config,"C:/Work/ClusterAnn/H40ul_mgf_list.txt",annotation_idxs,
	//		annotations,true);


		FileSet all_spec_fs;
		all_spec_fs.select_all_files(fm,true);

	//	config->set_need_to_normalize(0);
	//	all_spec_fs.create_MGF_file(fm,config,"C:/Work/ClusterAnn/mgf_spectra.mgf");
	//	exit(0);

		ofstream mgf_stream("C:/Work/ClusterAnn/mzxml_spectra3.mgf",ios::out);
		BasicSpecReader bsr;
		const vector<SingleSpectrumFile *>& ssfs = all_spec_fs.get_ssf_pointers();
		int i;
		for (i=0; i<ssfs.size(); i++)
		{
			static QCPeak peaks[5000];
			BasicSpectrum bs;
			MZXML_single *ssf = (MZXML_single *)ssfs[i];

			bs.peaks = peaks;
			bs.ssf = ssf;
						
			ostringstream oss;
			oss << ssf->file_idx << " " << ssf->scan_number;
			ssf->single_name = oss.str();

			bs.num_peaks = bsr.read_basic_spec(config,fm,ssf,peaks);
			
			if (ssf->scan_number<0)
			{
				cout << "Error: no scan number read from mzXML!!!" << endl;
				exit(1);
			}

			cout << "scan: " << ssf->scan_number << " " << bs.num_peaks << endl;

			bs.output_to_mgf(mgf_stream,config);
		//	bs.output_to_mgf(cout,&config);
		}
		//all_spec_fs.create_MGF_file(fm,config,"C:/Work/ClusterAnn/mzxml_spectra.mgf");
	//	extractMZFromFiles(model.get_config(),,"C:/Work/Data/Briggs/HEK293/H29340ul_mz.txt");
	

	//
		exit(0);
	}


	if (1) 
	{ 
		model.read_model("LTQ_LOW_TRYP");  
		config = model.get_config();
		config->apply_selected_PTMs("C+57 M+16");
		config->set_tolerances(0.5);
		config->set_pm_tolerance(2.5);
		config->set_digest_type(TRYPSIN_DIGEST);
	
		config->set_max_number_peaks_per_local_window(15);

	//	fm.init_from_list_file(config,"C:\\Work\\clust_exp\\LTQ_train2_ann_list.txt");
	//	fm.init_from_list_file(config,"C:\\Work\\msms5\\lists\\LTQ_train_list.txt");
	//	fm.init_from_list_file(config,"C:\\Work\\msms5\\lists\\orbi_train.txt");
	//	edge_model.train_all_edge_models(fm,&model);
	//	tm.train_models(fm,&model); 

	//	fm.init_from_list_file(config,"C:\\Work\\clust_exp\\ShewMGF\\BM2000_ann_list.txt");
	//	make_frag_rank_histogram(fm,config);
	//	exit(0);

	//	benchmark_k_value(config,"C:\\Work\\clust_exp\\ShewMGF\\BM2000_ann_list.txt");

	//	make_benchmark_clustering_dataset(config, "C:\\Work\\clust_exp\\ShewMGF\\AnnsPlus_ann_list.txt",
	//			600, 750, false, "C:\\Work\\clust_exp\\ShewMGF\\", "BMNEW"); 

	//	exit(0);
		benchmark_clustering_performance(config,
			"C:\\Work\\clust_exp\\ShewMGF\\BM2000_ann_list.txt",15);

 
	//	print_dataset_spectra_by_stats(config,"C:\\Work\\clust_exp\\ann_mgf\\Sings_1.mgf");

	//	benchmark_top7_and_sim_thresh(config,"C:\\Work\\clust_exp\\tmp\\H293_40ul_list.txt",
	//		"C:\\Work\\clust_exp\\Results\\BM40ul\\BM40ul_anns.txt");

	//	benchmark_heuristic_filtering(config,"C:\\Work\\clust_exp\\tmp\\H293_40ul_list.txt");
	//	benchmark_retention_thresh(config,"C:\\Work\\clust_exp\\tmp\\H293_40ul_list.txt",
	//		"C:\\Work\\clust_exp\\Results\\BM40ul\\BM40ul_anns.txt");

		exit(0);

		FileManager fm;

	//	fm.init_from_mgf(config,"C:\\Work\\clust_exp\\ShewMGF\\OnlyAnn_1.mgf");


	//	make_specified_benchmark_clustering_dataset(config,"C:\\Work\\clust_exp\\ShewMGF\\both_list.txt",400,1000,
	//		"C:\\Work\\clust_exp\\ShewMGF\\","BM3000",3000,10,0);

		make_benchmark_clustering_dataset(config, "C:\\Work\\clust_exp\\ShewMGF\\AnnsPlus_ann_list.txt",
				800, 1200, true, "C:\\Work\\clust_exp\\ShewMGF\\", "AnnOnly"); 
		exit(0);

		ann_mgf_and_create_mgf_with_sim_masses(config,"K:\\Work\\Data\\Shewenella\\FT_anns.txt",
			"K:\\Work\\Data\\Shewenella\\FT_mgf_list.txt",
			"K:\\Work\\Data\\Shewenella\\FT_peptides.txt",
			"C:\\Work\\clust_exp\\ShewMGF\\",
			"AnnsPlus");

		exit(0);

		ann_mgf_and_create_mgf(config,"C:\\Work\\Data\\FT_mgf\\FT_single_anns.txt", 
			"C:\\Work\\Data\\FT_mgf\\FT_single_mgf.txt",
			"C:\\Work\\clust_exp\\ShewMGF\\",
			"Single",true);

		exit(0);

		ann_mgf_and_create_mgf(config,"C:\\Work\\Data\\FT_mgf\\FT_anns.txt", 
			"C:\\Work\\Data\\FT_mgf\\FT_mgf_list.txt",
			"C:\\Work\\clust_exp\\ShewMGF\\",
			"OnlyAnn",true);

		exit(0);

	//	create_16O_18O_dataset("C:\\Work\\msms5\\lists\\p19_list.txt",config);
	//	exit(0);

	//	config->set_need_to_estimate_pm(0);

		model.clone_charge_model(2,1);
		model.clone_charge_model(2,3);
		model.clone_charge_model(2,4);
		model.clone_charge_model(2,5);

	//	dataset_eval(&model,"C:\\Work\\msms5\\lists\\CAD_376.txt",0.05);
	//	dataset_eval(&model,"C:\\Work\\msms5\\lists\\ann_qtof_list.txt",0.1);
	//	dataset_eval(&model,"C:\\Work\\msms5\\lists\\list280_mgf.txt",0.6);

		

		vector<int>   set_sizes;  
		vector<float> probs;
		denovo_sequencing_and_aa_probs(&model,"C:\\Work\\msms5\\lists\\m280_list.txt",
			set_sizes,probs,2); 

//		denovo_sequencing_and_aa_probs(&model,"C:\\Work\\clust_exp\\LTQ_train2_ann_list.txt",
//			set_sizes,probs,2); 
//
//		output_denovo_results(&model,"C:\\Work\\msms5\\lists\\LTQ-FT_mgf_list.txt");

	//	denovo_sequencing_and_aa_probs(&model,"C:\\Work\\msms5\\lists\\LTQ-FT_mgf_list.txt",
	//		set_sizes,probs, 2);
		exit(0);

	//	print_specs(model.get_config(), "C:\\Work\\msms5\\lists\\one_mzxml.txt");
	//	check_m_over_z(&model,"C:\\Work\\msms5\\lists\\CoCl345sann_ann_list.txt");
	//	calc_parent_mass_tolerance_distribution(&model, "C:\\Work\\msms5\\lists\\ann_mgf_list.txt" , 0.6, 0.98);
	//	calc_tolerance_distribution(&model,"C:\\Work\\msms5\\lists\\ann_mgf_list.txt", 0.6, 0.95);

	//	perfrom_inital_evalutation("C:\\Work\\msms5\\lists\\ann_mgf_list.txt",0.5,2,0.05);

	//	denovo_sequencing_results(&model,"C:\\Work\\msms5\\lists\\CAD_376.txt" ,0.0075);
	//	denovo_sequencing_results(&model,"C:\\Work\\msms5\\lists\\ann_qtof_list.txt",0.1);
	//	denovo_sequencing_results(&model,"C:\\Work\\msms5\\lists\\ann_orbi_list.txt",0.008);

	//	perfrom_inital_evalutation("C:\\Work\\msms5\\lists\\ann_qtof_list.txt",0.2,2,0.05);
	//	perfrom_inital_evalutation("C:\\Work\\msms5\\lists\\ann_orbi_list.txt",0.025,2,0.05);
	//	calc_tolerance_distribution(&model,"C:\\Work\\msms5\\lists\\ann_qtof_list.txt",0.1,0.95);
	//	calc_tolerance_distribution(&model,"C:\\Work\\msms5\\lists\\ann_orbi_list.txt",0.1,0.95);
	//	calc_tolerance_distribution(&model,"C:\\Work\\msms5\\lists\\CAD_376.txt",0.1,0.95);
	//	calc_tolerance_distribution(&model,"C:\\Work\\Data\\Omics04\\omics_ann_list.txt",0.75,0.95);

	//	calc_parent_mass_tolerance_distribution(&model,"C:\\Work\\msms5\\lists\\ann_qtof_list.txt",0.1,0.975);
	//	calc_parent_mass_tolerance_distribution(&model,"C:\\Work\\msms5\\lists\\ann_orbi_list.txt",0.1,0.975);
	//	calc_parent_mass_tolerance_distribution(&model,"C:\\Work\\msms5\\lists\\CAD_376.txt",0.1,0.975);
		exit(0);

	//	fm.init_from_mgf(config,"C:\\Work\\msms4\\PepNovo\\test\\m280.mgf");
	//	fm.init_from_list_file(config,"C:\\Work\\msms5\\lists\\good_list2.txt");
	//	fm.init_from_list_file(config,"C:\\Work\\msms5\\lists\\p215.txt");
	//	fm.init_from_list_file(config,"C:\\Work\\Data\\Omics04\\omics_ann_list.txt");
	//	fm.init_from_list_file(config,"C:\\Work\\msms5\\lists\\omics_mgf_list.txt");
	//	fm.init_from_mgf(config,"C:\\Work\\msms5\\PepNovoHQ\\Omics04Spectra.mgf");

	//	fm.init_from_list_file(config,"C:\\Work\\msms5\\lists\\omics02_dta.txt");
	//	FileSet fs;
	//	fs.select_all_files(fm);
	//	fs.sort_according_to_m_over_z();
	//	fs.create_MGF_file(fm,config,"Omics02Spectra.mgf");


	//	exit(0);

	//	collect_denovo_statistics(fm,&model);
	//	denovo_histograms(fm,&model);
	//	config->set_tolerance(0.0075);
	//	random_check_homemorphic(config,50000,25);

	//	create_spectrum_clusters(config,"C:\\Work\\msms5\\lists\\Drosophila_list.txt",".","cc",0,5E6,0,1E6);
	//	create_spectrum_clusters(config,"C:\\Work\\msms5\\lists\\Dros_short.txt",".","cc",0,1E6,0,1E6);
//		create_spectrum_clusters(config,"C:\\Work\\msms5\\lists\\all_clust.txt","clust_out","ikkb",0,5E6,0,1E6);


	//	create_spectrum_clusters(config,"C:\\Work\\msms5\\lists\\omics_mgf_list.txt","clust_out",
	//		"Omics04b",0,5E6,0,1E6);

	//	create_spectrum_clusters(config,"C:\\Work\\msms5\\lists\\omics02_mgf_list.txt","clust_out",
	//		"Omics02",0,5E6,0,1E6);
		

	//	create_spectrum_clusters(config,"C:\\Work\\msms5\\PepNovoHQ\\clust_out2\\h293_dat_list.txt",
	//		"clust_out2","H293b_2nd_digest_abd3",0,5E6,1938.76,1E6,2);

	//	create_spectrum_clusters(config,"C:\\Work\\msms5\\PepNovoHQ\\clust_out\\h29s_list.txt","clust_out",
	//		"xxxx",0,5E6,835.397,1E6,2);

		exit(0);



		DAT_Converter dat;
		dat.init_DAT_Converter(2000,25,1048576);

		exit(0);
	}


	
//	config->add_selected_PTMs("C+57 M+16 S+80 T+80 Y+80 N+1 Q+1 K+42 D+16 K+16 P+16 N+16");
//	config->set_tolerances(0.0075);
//	config->set_pm_tolerance(0.011);




	
//	fdb.create_db_from_fasta("C:\\Work\\msms5\\PepNovoHQ\\DB\\contaminants.fasta",config);
//  fdb.create_db_from_fasta("C:\\Work\\msms5\\PepNovoHQ\\DB\\Homo_sapiens.NCBI35.dec.pep.fa",config);
//	fdb.create_db_from_fasta("C:\\Work\\msms5\\PepNovoHQ\\DB\\fa50mb.fa",config,true,5,6);
//	fdb.create_db_from_fasta("C:\\Work\\msms5\\PepNovoHQ\\DB\\homo_pos.fa",config);
//	fdb.print_protein_names();
//	fdb.write_FastaDB("C:\\Work\\msms5\\PepNovoHQ\\DB\\h**o.dat");
//	fdb.read_FastaDB("C:\\Work\\msms5\\PepNovoHQ\\DB\\h**o.dat",config);
//	fdb.read_FastaDB("C:\\Work\\msms5\\PepNovoHQ\\DB\\qqq.dat",config);
//	fdb.read_FastaDB("C:\\Work\\msms5\\PepNovoHQ\\DB\\fa500k.dat",config);
//	fdb.print_protein_names();
//	fdb.write_FastaDB("C:\\Work\\msms5\\PepNovoHQ\\DB\\fa5--0mb.dat");

//  exit(0);

//	fm.init_from_list_file(config,"C:\\Work\\msms5\\lists\\CAD_seq_list.txt");
//	fm.init_from_list_file(config,"C:\\Work\\msms5\\lists\\CAD_u_list.txt");
	fm.init_from_list_file(config,"C:\\Work\\msms5\\lists\\CAD_376.txt");

	

//	make_bin_histograms(fm,config);
//	calc_avg_rand(fm,config);
//	explore_fragment_set(fm,config);
//	show_occurences(fm,config,"p-25.0");
//	find_internal_offset_counts(fm,config);
//	make_frag_rank_histogram(fm,config);
//	exit(0);


//	internal_fragment_annotation(fm,model.get_config());
//	find_internal_offset_counts(fm,model.get_config());
//	exit(0);

	FileSet fs;
	fs.select_all_files(fm);
	fstream ofs("true_seq.txt",ios::out);
	fs.make_fasta_from_file_seqs(fm,config,45,ofs);

//	dbsm.train_regression_models(fm,fdb,12,&model);

//	analyze_cad_spec(fm,config);
//	make_rank_histograms(fm,&model);
//	dbsm.read_model("DBS.txt");
//	CAD_histograms(fm,&model,fdb,&dbsm);
//	rand_db_stats(fm,&model,&dbsm);
//	db_search_stats(fm,&model,&dbsm);
//	neg_db_search_stats(fm,&model,&dbsm);
//	CAD_edge_stats(fm,&model);
//	CAD_denovo_histograms(fm,&model,fdb);
//	CAD_edge_stats(fm,&model);
//	collect_denovo_statistics(fm,&model);
	exit(0);

//	int *arr;
//	int t_size;
//	read_fasta("cshort.fasta",&arr,&t_size,&config);
//	read_fasta("C:\\Work\\msms4\\PepNovo\\Homo_sapiens.NCBI35.dec.pep.fa",&arr,&t_size,config);
	read_fasta("Homo_sapiens.NCBI35.dec.pep.fa",&arr,&t_size,&config);
	read_fasta("Homo_sapiens.NCBI35.dec.pep.fa",&arr,&t_size,&config);
	read_fasta("Homo_sapiens.NCBI35.dec.pep.fa",&arr,&t_size,&config);
	read_fasta("Homo_sapiens.NCBI35.dec.pep.fa",&arr,&t_size,&config);
	read_fasta("Homo_sapiens.NCBI35.dec.pep.fa",&arr,&t_size,&config);
	read_fasta("Homo_sapiens.NCBI35.dec.pep.fa",&arr,&t_size,&config); 
//	homeomorphic_levels(&config,arr,t_size,1000.0,1001,"hp_res.txt");

	config->set_tolerance(0.0075);
//	full_exp(config,0,arr,t_size,"hp_res_dis_a0.txt");
//	full_exp(config,1,arr,t_size,"hp_res_dis_a1.txt");
//	full_exp(config,2,arr,t_size,"hp_res_dis_a2.txt");
//	full_exp(config,3,arr,t_size,"hp_res_dis_a3.txt");
//	full_exp(config,4,arr,t_size,"hp_res_dis_a4.txt");
	

//	homeomorphic_exp3(&config,100);
//	exit(0);
//	config.print_supported_PTMs();

//	config.print_session_aas();
	string p;
//	ifstream fs("D:\\msms4\\PepNovo\\test\\C25A19_IP_01.mgf",ios::in);
//	ifstream fs("D:\\msms4\\PepNovo\\test\\m280.mgf",ios::in);
//	fm.init_from_mgf(&config,"D:\\msms4\\PepNovo\\test\\m280.mgf");
//	fm.init_from_mgf(&config,"D:\\msms4\\PepNovo\\mgf_2600.2.mgf");
//	fm.init_from_list_file(&config,"D:\\msms4\\lists\\unique_good2.txt");
	
//	fm.init_from_list_file(&config,"D:\\msms4\\lists\\short2.txt");
//	fm.init_from_list_file(&config,"D:\\Data2\\ikkb_unmod_list.txt");

//	rand_seed(1111);
	rand_seed(1426347);


//	model.read_model("ESI_RANKS");
//	model.get_config()->add_selected_PTMs("C+57");

//	SpectrumScore sqs;
//	sqs.learn_score_params(&model,"D:\\msms4\\lists\\sqs2_short.txt",
//		"D:\\msms4\\lists\\sqs_neg1.txt");	
//	exit(0);


//	model.set_model_name(string("ESI2"));


//	random_peak_match_exp(model.get_config(),fm,800,1200,10000000);
//	model.print_joint_scores();

//	me_reg_exp();
//	me_exp();
//	exit(0);





//	fm.init_from_mgf(model.get_config(),"c:\\work\\msms4\\PepNovo\\test\\m280.mgf");
//	fm.init_from_mgf(model.get_config(),"c:\\work\\msms4\\PepNovo\\hpep.mgf");
	fm.init_from_list_file(config,"c:\\Work\\msms4\\lists\\efast2.txt");
//	fm.init_from_list_file(&config,"D:\\msms4\\lists\\charge2.txt");
//m.init_from_list_file(model.get_config(),"C:\\Work\\msms4\\PepNovo\\lll.txt");
//m.init_from_list_file(model.get_config(),"D:\\msms4\\lists\\l1.txt");
	


*/


	return 0;
}
Пример #16
0
void LibraryFile::add_to_fileset(FileSet& fileset) { fileset.add_library(this); }
Пример #17
0
void PMCSQS_Scorer::train_sqs_models(Config *config, 
									 const FileManager& fm_pos, 
									 const char *neg_list,
									 int specificCharge, 
									 vector<vector<float> > *inputWeights)
{
	vector< vector< vector<ME_Regression_Sample> > > samples; //  neg, p1, p2, p3 / sizeIndex
	FileManager fm_neg;

	const vector<int>& spectra_counts = fm_pos.get_spectra_counts();
	maximalChargeWithModels_ = (inputWeights ? inputWeights->size()-1 : 3);
	int charge;

	set_frag_pair_sum_offset(MASS_PROTON); // b+y - PM+19
	set_bin_increment(0.1);
	this->set_sqs_mass_thresholds();

	if (this->pmcMassThresholds_.size() == 0)
	{
		pmcMassThresholds_=config->get_size_thresholds();
	}

	vector<vector<float> > classWeights;
	if (inputWeights)
	{
		classWeights = *inputWeights;
	}
	else
	{
		classWeights.resize(maximalChargeWithModels_+1);
		int i;
		for (i=0; i<classWeights.size(); i++)
			classWeights[i].resize(maximalChargeWithModels_+1,1.0);
	}

	const int numSizes = this->sqsMassThresholds_.size();
	cout << "NUM SIZE MODELS: " << numSizes+1 << endl;

	samples.resize(maximalChargeWithModels_+1);

	fm_neg.init_from_list_file(config, neg_list);
	const int max_to_read_per_file = 8000;

	for (charge=0; charge<=maximalChargeWithModels_; charge++)
	{
		if (charge>0 && specificCharge>0 && charge != specificCharge)
			continue; 

		int sizeIndex;
		for (sizeIndex=0; sizeIndex<=numSizes; sizeIndex++)
		{	
			const mass_t minMass = (sizeIndex == 0 ? 0 : sqsMassThresholds_[sizeIndex-1]);
			const mass_t maxMass = (sizeIndex == numSizes ? POS_INF : sqsMassThresholds_[sizeIndex]);

			samples[charge].resize(numSizes+1);

			BasicSpecReader bsr;
			QCPeak peaks[5000]; 

			FileSet fs;
			if (charge == 0)
			{
				fs.select_files_in_mz_range(fm_neg,minMass, maxMass,0);	
			}
			else
			{
				fs.select_files_in_mz_range(fm_pos, minMass, maxMass, charge);
			}

			cout << "Found " << fs.get_total_spectra() << " for charge " << charge << " ranges:" <<
				minMass << " - " << maxMass << endl;

			fs.randomly_reduce_ssfs(max_to_read_per_file);
			const vector<SingleSpectrumFile *>& all_ssf = fs.get_ssf_pointers();
			const int label = (charge == 0 ? 1 : 0);
			const int num_samples =  all_ssf.size();
						
			samples[charge][sizeIndex].resize(num_samples);

			
			int i;
			for (i=0; i<num_samples; i++)
			{
				SingleSpectrumFile* ssf = all_ssf[i];
				BasicSpectrum bs;

				bs.peaks = peaks;
				bs.ssf = ssf;
			
				if (charge==0)
				{
					bs.num_peaks = bsr.read_basic_spec(config,fm_neg,ssf,peaks);
					bs.ssf->charge=0;
				}
				else
					bs.num_peaks = bsr.read_basic_spec(config,fm_pos,ssf,peaks);

				init_for_current_spec(config,bs);
				calculate_curr_spec_pmc_values(bs, bin_increment);
			
				fill_fval_vector_with_SQS(bs, samples[charge][sizeIndex][i]);
				
				samples[charge][sizeIndex][i].label = label;
			}
		}
	}

	// cout sample composition
	cout << "Sample composition:" << endl;
	for (charge=0; charge<=maximalChargeWithModels_; charge++)
	{
		cout << charge;
		int i;
		for (i=0; i<samples[charge].size(); i++)
			cout << "\t" << samples[charge][i].size();
		cout << endl;
	}

	// create SQS models
	this->sqs_models.resize(maximalChargeWithModels_+1);
	for (charge =0; charge<=maximalChargeWithModels_; charge++)
	{
		sqs_models[charge].resize(maximalChargeWithModels_+1);
		int j;
		for (j=0; j<sqs_models[charge].size(); j++)
			sqs_models[charge][j].resize(numSizes+1,NULL);
	}



	for (charge=1; charge<=maximalChargeWithModels_; charge++)
	{
		int sizeIndex;
		for (sizeIndex=0; sizeIndex<=numSizes; sizeIndex++)
		{
			ME_Regression_DataSet ds;

			cout << endl << "CHARGE " << charge << " SIZE " << sizeIndex << endl;
			ds.num_classes=2;
			ds.num_features=SQS_NUM_FIELDS;
			ds.add_samples(samples[0][sizeIndex]);
			ds.add_samples(samples[charge][sizeIndex]);
			ds.tally_samples();

			if (ds.class_weights[0]<0.0001 || ds.class_weights[1]<0.0001)
			{
				cout << "Warning: insufficient number of samples, not trianing model for this charge " << charge <<
					" size " << sizeIndex << endl;
				continue;
			}

			const double pos_weight = 0.2 + classWeights[charge][sizeIndex]*0.3;

			ds.randomly_remove_samples_with_activated_feature(1,SQS_IND_MAX_TAG_LENGTH_ABOVE_4,0.5);

			ds.calibrate_class_weights(pos_weight); // charge vs bad spectra
			ds.print_feature_summary(cout,SQS_var_names);

			sqs_models[charge][0][sizeIndex]=new ME_Regression_Model;

			sqs_models[charge][0][sizeIndex]->train_cg(ds,250);

			sqs_models[charge][0][sizeIndex]->print_ds_probs(ds);

		}
	}

		
	////////////////////////////////////////////
	// train model vs. model if charge1>charge2
	if (1)
	{
		int charge1,charge2;
		for (charge1=2; charge1<=maximalChargeWithModels_; charge1++)
		{
			for (charge2=1; charge2<charge1; charge2++)
			{
				int sizeIndex;
				for (sizeIndex=0; sizeIndex<=numSizes; sizeIndex++)
				{
					ME_Regression_DataSet ds;

					ds.num_classes=2;
					ds.num_features=SQS_NUM_FIELDS;

					ds.add_samples(samples[charge1][sizeIndex]);

					int i;
					for (i=0; i<samples[charge2][sizeIndex].size(); i++)
					{
						samples[charge2][sizeIndex][i].label=1;
						ds.add_sample(samples[charge2][sizeIndex][i]);
						samples[charge2][sizeIndex][i].label=0;
					}

					float relative_weight = classWeights[charge1][sizeIndex]/
						(classWeights[charge1][sizeIndex]+classWeights[charge2][sizeIndex]);

					ds.tally_samples();

					if (ds.class_weights[0]<0.0001 || ds.class_weights[1]<0.0001)
					{
						cout << "Warning: insufficient number of samples, not trianing model for charge " << charge1 <<
							" vs charge " << charge2<< " (size " << sizeIndex << ")" << endl;
						continue;
					}

					ds.calibrate_class_weights(relative_weight);

					sqs_models[charge1][charge2][sizeIndex] = new ME_Regression_Model;

					cout << endl << "CHARGE " << charge1 << " vs " << charge2 << "  size " << sizeIndex << endl;
					cout << "Relative weights: " << charge1 << "/(" << charge1 << "+" <<
						charge2 << "): " << relative_weight << endl;
				
					ds.print_feature_summary(cout,SQS_var_names);

					sqs_models[charge1][charge2][sizeIndex]->train_cg(ds,300);
					sqs_models[charge1][charge2][sizeIndex]->print_ds_probs(ds);
				}
			}
		}
	}

	init_sqs_correct_factors(maximalChargeWithModels_, sqsMassThresholds_.size());

	////////////////////////////////////////////
	// final report on datasets
	cout << endl;

	int sizeIndex;
	for (sizeIndex=0; sizeIndex<=numSizes; sizeIndex++)
	{
		cout << endl << "SIZE: " << sizeIndex << endl;
		cout << "--------" << endl;
		float p_thresh = 0.05;
		int d;
		for (d=0; d<=maximalChargeWithModels_; d++)
		{
			vector<int> counts;
			vector<int> max_counts;
			counts.resize(maximalChargeWithModels_+1,0);
			max_counts.resize(maximalChargeWithModels_+1,0);

			int i;
			for (i=0; i<samples[d][sizeIndex].size(); i++)
			{
				bool above_thresh=false;
				float max_prob=0;
				int   max_class=0;
				int c;
				for (c=1; c<=maximalChargeWithModels_; c++)
				{
					if (! sqs_models[c][0][sizeIndex])
						continue;

					float prob = sqs_models[c][0][sizeIndex]->p_y_given_x(0,samples[d][sizeIndex][i]);
					if (prob>p_thresh)
					{
						counts[c]++;
						above_thresh=true;
						if (prob>max_prob)
						{
							max_prob=prob;
							max_class=c;
						}
					}
				}
				max_counts[max_class]++;

				if (! above_thresh)
					counts[0]++;
			}

			cout << d << "\t";
			for (i=0; i<=maximalChargeWithModels_; i++)
				cout << fixed << setprecision(4) << max_counts[i]/(float)samples[d][sizeIndex].size() << "\t";
			cout << endl;
		}
	}



	ind_initialized_sqs = true;

	string path;
	path = config->get_resource_dir() + "/" + config->get_model_name() + "_SQS.txt";
	write_sqs_models(path.c_str());
}
Пример #18
0
/*
 * files: a pre-set class for the input parameters. containing a plugboard file,
 * a reflector file, arbitrary rotor/start-position files.
 *
 * constructor Machine
 */
Machine::Machine(FileSet files){
    this->error = 0;
    this->mPlugBoard = new PlugBoard(files.getPlugboardfile());
    if(mPlugBoard->getError() != 0){
        this->error = mPlugBoard->getError();
        delete this->mPlugBoard;
        return;
    }
    for(int i=0; i < (int)files.getRotorsfile().size(); i++){
        this->mRotor.push_back(NULL);
    }

    for(int i=0; i < (int)files.getRotorsfile().size(); i++){
        int sizeR = files.getRotorsfile().size() - 1 - i;
        //the first file will go last, and the last file will go first(NO.0 rotor)
        mRotor[i] = (new Rotor(files.getRotorsfile()[sizeR], i)); //make a rotor
        if(mRotor[i]->getError() != 0){
            this->error = mRotor[i]->getError();
            delete this->mPlugBoard;
            for(int j=0; j <= i; j++){
                delete this->mRotor[j]; // delete the vector space one by one.
            }
            return;
        }
        if(i > 0 && i < (int)files.getRotorsfile().size() -1){
            mRotor[i-1]->connect(mRotor[i]);  //function to assemble rotors
        }
        else if (i>0 && i == (int)files.getRotorsfile().size() -1){
            mRotor[i-1]->connect(mRotor[i]);
        }
    }
    this->mReflector = new Reflector(files.getReflectorfile());
    if(mReflector->getError() != 0){
        this->error = mReflector->getError();
        delete this->mPlugBoard;
        delete this->mReflector;

        for(int i=0; i< (int)this->mRotor.size(); i++){
            delete this->mRotor[i]; // delete the vector space one by one.
        }
        this->mRotor.clear();
        return;
    }

    //set start positions of rotors by rotating them
    if(files.getRotorsfile().size() != 0){
        if(files.getStartpositionfile()){
            fstream startPos;
            startPos.open(files.getStartpositionfile());
            if(startPos.fail()){
                errorChecked(ERROR_OPENING_CONFIGURATION_FILE);
                this->error = ERROR_OPENING_CONFIGURATION_FILE;
                delete this->mPlugBoard;
                delete this->mReflector;
                for(int i=0; i< (int)this->mRotor.size(); i++){
                    delete this->mRotor[i]; // delete the vector space one by one.
                }
                return;
            }
            char testC;
            while(startPos >> testC){
                if(testC != ' ' && ((int)testC > 57 || (int)testC < 48)){
                    errorChecked(NON_NUMERIC_CHARACTER);
                    this->error = NON_NUMERIC_CHARACTER;
                    delete this->mPlugBoard;
                    delete this->mReflector;
                    for(int i=0; i< (int)this->mRotor.size(); i++){
                        delete this->mRotor[i]; // delete the vector space one by one.
                    }
                    return;
                }
            }

            startPos.clear();
            startPos.seekg(0, startPos.beg); // reuse the file pointer.

            for (int i=(int)this->mRotor.size()-1; i>-1 &&!startPos.eof() ; i--){
                int startPosition = -1;

                startPos >> startPosition;
                // an non-integer input causing a failed bit
                if(startPos.fail()){
                    errorChecked(NO_ROTOR_STARTING_POSITION);
                    this->error = NO_ROTOR_STARTING_POSITION;
                    delete this->mPlugBoard;
                    delete this->mReflector;
                    for(int i=0; i< (int)this->mRotor.size(); i++){
                        delete this->mRotor[i]; // delete the vector space one by one.
                    }
                    return;
                }
                // start position out of range
                if (startPosition > 25 || startPosition < 0){
                    errorChecked(INVALID_INDEX);
                    this->error = INVALID_INDEX;
                    delete this->mPlugBoard;
                    delete this->mReflector;
                    for(int i=0; i< (int)this->mRotor.size(); i++){
                        delete this->mRotor[i]; // delete the vector space one by one.
                    }
                    return;
                }
                this->mRotor[i]->startPosition = startPosition;
                for(int j=0; j< mRotor[i]->startPosition; j++){
                    mRotor[i]->rotate();
                }
            }
        }
    }
Пример #19
0
void AdvancedScoreModel::learn_prm_normalizer_values(const FileManager& fm)
{
	const float step = 0.5;
	const float min_delta = -1.0;
	const float max_delta = 7.0;
	const float target_mid_ratio = 0.96;
	const float target_side_ratio = 0.94;


	config.set_use_spectrum_charge(1);

	regional_prm_normalizers.resize(regional_breakage_score_models.size());
	int c;
	for (c=0; c<regional_breakage_score_models.size(); c++)
	{
		regional_prm_normalizers[c].resize(regional_breakage_score_models[c].size());
		int s;
		for (s=0; s<regional_breakage_score_models[c].size(); s++)
			regional_prm_normalizers[c][s].resize(regional_breakage_score_models[c][s].size(),0);
	}
	

	const vector< vector<mass_t> >& mass_threshes = config.get_size_thresholds();
	for (c=1; c<regional_prm_normalizers.size(); c++)
	{
		int s;
		for (s=0; s<regional_prm_normalizers[c].size(); s++)
		{
			const mass_t min_mass = (s == 0 ? 0 : mass_threshes[c][s-1]);
			const mass_t max_mass =  mass_threshes[c][s];
			const int num_regions = regional_prm_normalizers[c][s].size();
			
			cout << "Finding normalizers for charge " << c << " size " << s << "  (masses " << min_mass << " - " <<
				max_mass << ")" << endl;

			FileSet fs;
			BasicSpecReader bsr;

			fs.select_files_in_mz_range(fm,min_mass/c,max_mass/c,c);
			fs.randomly_reduce_ssfs(2000);

			vector< vector< NodeType > > all_prms;
			const vector<SingleSpectrumFile *>& all_ssf = fs.get_ssf_pointers();

			if (fs.get_total_spectra()<50)
			{
				cout << "Insufficient number of spectra... skipping" << endl;
				continue;
			}

			int sc;
			for (sc=0; sc<all_ssf.size(); sc++)
			{
				PrmGraph prm;
				static vector<QCPeak> peaks;
				SingleSpectrumFile *ssf = all_ssf[sc];
				if (peaks.size()<ssf->num_peaks)
				{
					int new_size = ssf->num_peaks*2;
					if (new_size<2500)
						new_size=2500;
					peaks.resize(new_size); 
				}

				const int num_peaks = bsr.read_basic_spec(&config,fm,ssf,&peaks[0]);	
				if (num_peaks<5)
					continue;

				// convert peak list ot a spectrum with charge (if original charge ==0)
				// the spectrum gets charge 2, but the true charge is computed from the data
			
				Spectrum s;
				s.init_from_QCPeaks(&config,&peaks[0],num_peaks,ssf);

				vector<mass_t> pms_with_19;
				vector<int>    charges;

				pms_with_19.clear();
				charges.clear();		
				
				BasicSpectrum bs;
				bs.ssf = ssf;
				bs.peaks = &peaks[0];
				bs.num_peaks = num_peaks;

				// output m/z and prob values for the different charge states
				
				select_pms_and_charges(&config,bs,pms_with_19,charges);
				if (pms_with_19.size()<=0)
					continue;
			
				s.set_charge(charges[0]);
				init_model_for_scoring_spectrum(&s);
				prm.create_graph_from_spectrum(this,&s,pms_with_19[0]);

				vector<NodeType> spec_prms;
				vector<mass_t>   exp_masses;
				const mass_t true_mass_with_19 = s.get_true_mass_with_19();
				s.get_peptide().calc_expected_breakage_masses(&config,exp_masses);

				int i;
				for (i=1; i<prm.get_num_nodes()-1; i++)
				{
					const Node& node = prm.get_node(i);
					if (node.score == 0)
						continue;
					
					NodeType nt;

					nt.type = 0;
					int j;
					for (j=0; j<exp_masses.size(); j++)
						if (fabs(exp_masses[j]-node.mass)<config.get_tolerance())
						{
							nt.type=1;
							break;
						}
					
					if (nt.type<=0)
					{
						int j;
						for (j=0; j<exp_masses.size(); j++)
							if (fabs(true_mass_with_19 - exp_masses[j] -node.mass-MASS_PROTON)<config.get_tolerance())
							{
								nt.type=2;
								break;
							}
					}

					nt.org_score = node.score;
					nt.mod_score = node.score;
					nt.region = node.breakage.region_idx;
					spec_prms.push_back(nt);
				}
				all_prms.push_back(spec_prms);
			}
		
	
			vector< vector< double > > per_pre, per_suf, per_covered;
			vector<float> deltas;

			per_pre.resize(num_regions);
			per_suf.resize(num_regions);
			per_covered.resize(num_regions);

			float delta;
			for (delta = min_delta; delta<=max_delta; delta+=step )
			{
				// perform mods
				int a;
				for (a=0; a<all_prms.size(); a++)
				{
					int b;
					for (b=0; b<all_prms[a].size(); b++)
					{
						NodeType& nt = all_prms[a][b];
						if (nt.org_score< -delta)
						{
							nt.mod_score = NEG_INF;
							continue;
						}
						
					/*	if (nt.org_score>delta)
						{
							nt.mod_score = nt.org_score ;
						}
						else
							nt.mod_score = nt.org_score + (delta-nt.org_score)*0.5;*/
						nt.mod_score = nt.org_score + delta;
					}
				}

				// compute stats (if score is negative treat as 0)
				vector<double> num_pre,num_suf;
				vector<double> num_pre_wpos, num_suf_wpos;
				vector<double> score_pre, score_suf, total_score;
			

				num_pre.resize(num_regions,0);
				num_suf.resize(num_regions,0);
				num_pre_wpos.resize(num_regions,0);
				num_suf_wpos.resize(num_regions,0);
				score_pre.resize(num_regions,0);
				score_suf.resize(num_regions,0);
				total_score.resize(num_regions,0);
				
				for (a=0; a<all_prms.size(); a++)
				{
					int b;
					for (b=0; b<all_prms[a].size(); b++)
					{
						const int   type =    all_prms[a][b].type;
						const float score =   all_prms[a][b].mod_score;
						const int   region =  all_prms[a][b].region;

						if (type == 1)
						{
							num_pre[region]++;
							if (score>0)
							{
								num_pre_wpos[region]++;
								score_pre[region]+= score;
							}
						}

						if (type == 2)
						{
							num_suf[region]++;
							if (score>0)
							{
								num_suf_wpos[region]++;
								score_suf[region]+=score;
							}
						}

						if (score>0)
							total_score[region]+=score;
					}
				}

				
				deltas.push_back(delta);
				int r;
				for (r=0; r<num_regions; r++)
				{
					per_pre[r].push_back(num_pre_wpos[r]/num_pre[r]);
					per_suf[r].push_back(num_suf_wpos[r]/num_suf[r]);
					per_covered[r].push_back((score_pre[r]+score_suf[r])/total_score[r]);
				}
			}

			// report
			int r;
			for (r=0; r<num_regions; r++)
			{
				cout << endl << "Region " << r << endl;
				int d;
				for (d=0; d<deltas.size(); d++)
					cout << "\t" << deltas[d];
				cout << endl << "% Pre";
				for (d=0; d<per_pre[r].size(); d++)
					cout << "\t" << per_pre[r][d];
				cout << endl << "% Suf";
				for (d=0; d<per_suf[r].size(); d++)
					cout << "\t" << per_suf[r][d];
				cout << endl << "% Cov";
				for (d=0; d<per_covered[r].size(); d++)
					cout << "\t" << per_covered[r][d];
				cout << endl;

				// select
				float target_val = target_mid_ratio;
				if (r==0 || r == num_regions-1)
					target_val = target_side_ratio;

				float best_val=POS_INF;
				float best_delta=0;

				for (d=0; d<deltas.size(); d++)
					if (fabs(per_pre[r][d]-target_val)<best_val)
					{
						best_val = fabs(per_pre[r][d]-target_val);
						best_delta = deltas[d];
					}
				
				cout << "Chose delta = " << best_delta << endl << endl;
				regional_prm_normalizers[c][s][r]=best_delta;
			}	
		}
	}
}
Пример #20
0
void MFindDialog::GetFilesForFindAll(
	MMultiMethod	inMethod,
	const fs::path&	inDirectory,
	bool			inRecursive,
	bool			inTextFilesOnly,
	const string&	inFileNameFilter,
	FileSet&		outFiles)
{
	SetStatusString(inDirectory.string());
	
	switch (inMethod)
	{
		case eMMDirectory:
		{
			uint32 flags = 0;
			
			if (inRecursive)
				flags |= kFileIter_Deep;
			
			if (inTextFilesOnly)
				flags |= kFileIter_TEXTFilesOnly;
			
			MFileIterator iter(inDirectory, flags);
		
			if (inFileNameFilter.c_str())
				iter.SetFilter(inFileNameFilter);
		
			fs::path file;
			while (iter.Next(file))
				outFiles.insert(file);
			break;
		}
		
		case eMMOpenWindows:
		{
			MDocument* doc = MDocument::GetFirstDocument();
			while (doc != nil)
			{
				fs::path file = doc->GetURL().GetPath();
				if (exists(file))
					outFiles.insert(file);
				doc = doc->GetNextDocument();
			}
			break;
		}
		
		case eMMIncludes:
		{
			MProject* project = MProject::Instance();
			if (project != nil)
			{
				vector<fs::path> includePaths;

				project->GetIncludePaths(includePaths);

				sort(includePaths.begin(), includePaths.end());
				includePaths.erase(unique(includePaths.begin(), includePaths.end()), includePaths.end());

				for (vector<fs::path>::iterator p = includePaths.begin(); p != includePaths.end(); ++p)
					GetFilesForFindAll(eMMDirectory, *p, inRecursive, inTextFilesOnly, inFileNameFilter, outFiles);
			}
			break;
		}
	}
}
Пример #21
0
 gcc_pure
 bool IsEmpty() const {
   return files.empty();
 }
Пример #22
0
void GPFSConfigHandler::task()
{
    int nFSs   = 0;
    int nPools = 0;
    int nDisks = 0;
    int nFsets = 0;
    int nNodes = 0;

    TEAL_ERR_T ret;
    string msg;
    char tmp[10];
    string fsName;
    string stgName;
    string diskName;
    string fsetName;
    string nodeName;
    string clusterName;

    FilesystemInfo* fsInfo       = NULL;
    StoragePoolInfo* stgInfo     = NULL;
    DiskInfo* diskInfo           = NULL;
    FileSet* fsetInfo            = NULL;
    FileSet* fileSetList         = NULL;

    MErrno err = M_OK;
    log_info("########################Start refreshing all entities#########################################");    
    err = GPFSHandler::getPollHandler()->getDaemonState();
    if(err != M_OK)
    {
        msg = "daemon is down on local node ";
        msg += Utils::int_to_char(tmp,10,(unsigned int*)&err);
        log_warn(msg);
        /* Simply ignore this error to continue....
        log_error(msg);
        return;
        */
    }

    err = GPFSHandler::getPollHandler()->refreshClusterRecipe();
    if(err != M_OK)
    {
        msg = "refresh cluster failed with ";
        msg += Utils::int_to_char(tmp,10,(unsigned int*)&err);
        log_warn(msg);
        /* Simply ignore this error to continue....
        log_error(msg);
        return;
        */
    }
    ClusterInfo* clusterInfo = new ClusterInfo(&err);
    //update cluster info
    err = GPFSHandler::getPollHandler()->updateClusterInfo(clusterInfo);
    if(err != M_OK)
    {
        msg = "update cluster info failed with ";
        msg += Utils::int_to_char(tmp,10,(unsigned int*)&err);
        log_warn(msg);
        /* Simply ignore this error to continue....
        log_error(msg);
        return;
        */
    } 
    
    //update all nodes info
    err = GPFSHandler::getPollHandler()->updateNodeInfo(clusterInfo);
    if(err != M_OK)
    {
        msg = "update node failed with ";
        msg += Utils::int_to_char(tmp,10,(unsigned int*)&err);
        log_warn(msg);
        /* Simply ignore this error to continue....
        log_error(msg);
        return;
        */
    }        
    err = GPFSHandler::getPollHandler()->getClusterInfo(clusterInfo); //this maybe not needed
    if(err != M_OK)
    {
        msg = "get cluster info failed with ";
        msg += Utils::int_to_char(tmp,10,(unsigned int*)&err);
        log_warn(msg);
        /* Simply ignore this error to continue....
        log_error(msg);
        return;
        */
    }     
    err = GPFSHandler::getPollHandler()->updateDiskSDRInfo();
    if(err != M_OK)
    {   /*TODO: This API invokes "mmsdrquery 30 3001:3004:3005:3006:3007:3008:3002:3003" under the cover. Need to check if it is a real error or an expected configuration to determin whether to ignore it or not.*/
        msg = "update disk SDR info failed with ";
        msg += Utils::int_to_char(tmp,10,(unsigned int*)&err);
        msg += ", ignore it...";
        log_warn(msg);
       // return; // simply ignore it since there a configuration of two clusters and NSD may not be seen from the FS cluster.
    }
    err = GPFSHandler::getPollHandler()->updateFilesystemInfo(clusterInfo, 1);// to get perfermance statics even if not used.
    if(err != M_OK)
    {
        msg = "update file system failed with ";
        msg += Utils::int_to_char(tmp,10,(unsigned int*)&err);
        log_warn(msg);
        /* Simply ignore this error to continue....
        log_error(msg);
        return;
        */
    }    

    err = GPFSHandler::getPollHandler()->updateMountedNodeInfo(clusterInfo); // to get mounted node info
    if(err != M_OK)
    {   /*TODO: This API invokes "mmlsmount all_local -Y" under the cover. Need to check if it is a real error or an expected configuration to determin whether to ignore it or not.*/
        msg = "update mounted node info failed with ";
        msg += Utils::int_to_char(tmp,10,(unsigned int*)&err);
        msg += ", ignore it...";
        log_warn(msg);
       // return; // simply ignore it since there maybe no local file system configured
    } 
    err = GPFSHandler::getPollHandler()->updateVfsStatsInfo(clusterInfo); // to get vfs info
    if(err != M_OK)
    {
        msg = "update vfs info failed with ";
        msg += Utils::int_to_char(tmp,10,(unsigned int*)&err);
        log_warn(msg);
        /* Simply ignore this error to continue....
        log_error(msg);
        return;
        */
    } 
    err = GPFSHandler::getPollHandler()->updateThreadUtilInfo(clusterInfo); // to get thread util info
    if(err != M_OK)
    {
        msg = "update thread util info failed with ";
        msg += Utils::int_to_char(tmp,10,(unsigned int*)&err);
        log_warn(msg);
        /* Simply ignore this error to continue....
        log_error(msg);
        return;
        */
    }      
    err = GPFSHandler::getPollHandler()->updateIocStatsInfo(clusterInfo); // to get ioc statics info
    if(err != M_OK)
    {
        msg = "update ioc statics info failed with ";
        msg += Utils::int_to_char(tmp,10,(unsigned int*)&err);
        log_warn(msg);
        /* Simply ignore this error to continue....
        log_error(msg);
        return;
        */
    }      
    err = GPFSHandler::getPollHandler()->updateCacheStatsInfo(clusterInfo); // to get cache statics info
    if(err != M_OK)
    {
        msg = "update cache statics info failed with ";
        msg += Utils::int_to_char(tmp,10,(unsigned int*)&err);
        log_warn(msg);
        /* Simply ignore this error to continue....
        log_error(msg);
        return;
        */
    }  
    err = GPFSHandler::getPollHandler()->updatePCacheStatsInfo(clusterInfo); // to get pcache statics info
    if(err != M_OK)
    {
        msg = "update pcache statics info failed with ";
        msg += Utils::int_to_char(tmp,10,(unsigned int*)&err);
        log_warn(msg);
        /* Simply ignore this error to continue....
        log_error(msg);
        return;
        */
    } 
    err = GPFSHandler::getPollHandler()->updateFilesystemManagerInfo(clusterInfo);// update fs manager
    if(err != M_OK)
    {
        msg = "update file system manager failed with ";
        msg += Utils::int_to_char(tmp,10,(unsigned int*)&err);
        log_warn(msg);
        /* Simply ignore this error to continue....
        log_error(msg);
        return;
        */
    }    
    err = GPFSHandler::getPollHandler()->updatePolicyInfo(clusterInfo); // to get policy info
    if(err != M_OK)
    {
        msg = "update policy info failed with ";
        msg += Utils::int_to_char(tmp,10,(unsigned int*)&err);
        log_warn(msg);
        /* Simply ignore this error to continue....
        log_error(msg);
        return;
        */
    } 
    err = GPFSHandler::getPollHandler()->updateFilesystemConfigInfo(clusterInfo);// update fs config
    if(err != M_OK)
    {
        msg = "update file system config failed with ";
        msg += Utils::int_to_char(tmp,10,(unsigned int*)&err);
        log_warn(msg);
        /* Simply ignore this error to continue....
        log_error(msg);
        return;
        */
    }   
   
    ClusterStatus* clusterStatus = new ClusterStatus();
    err = GPFSHandler::getPollHandler()->getClusterStatus(clusterStatus); 
    if(err != M_OK)
    {
        msg = "get cluster status failed with ";
        msg += Utils::int_to_char(tmp,10,(unsigned int*)&err);
        log_warn(msg);
        /* Simply ignore this error to continue....
        log_error(msg);
        return;
        */
    } 
    
    clusterName = clusterInfo->getName();
    int i = 0;
    string clusterid = clusterInfo->getId();
    nFSs = clusterInfo->getNumFilesystems();
    //log fs one by one
    for( i = 0 ; i < nFSs; i++)
    {
        fsInfo = clusterInfo->getFilesystem(i);
           
        if (fsInfo == NULL)
        {
            msg = "NULL filesystem ";
            msg += Utils::int_to_char(tmp,10,(unsigned int*)&i);
            log_error(msg);
            continue;
        }
        fsName = fsInfo->getName(); 
        err = GPFSHandler::getPollHandler()->updateStoragePoolInfo(clusterInfo, (char*)fsName.c_str());
        if(err != M_OK)
        {
            msg  = "update storage pool info for file system: ";
            msg += fsName;
            msg += " failed with ";
            msg += Utils::int_to_char(tmp,10,(unsigned int*)&err);
            log_warn(msg);
            continue;
        }  
        msg = "Refresh file system: ";
        msg += fsName;
        log_debug(msg);
        ret = refreshFS(fsInfo, clusterid);
        if(ret != TEAL_SUCCESS)
        {
            msg  = "Refresh file system: ";
            msg += fsName;
            msg += " failed with ";
            msg += Utils::int_to_char(tmp,10,(unsigned int*)&ret);
            log_error(msg);
        }  
          
        nPools = fsInfo->getNumStoragePools();
        int j = 0;   
        //log stg one by one
        for(; j < nPools; j++ )
        {         
            stgInfo = fsInfo->getStoragePool(j);
            if(stgInfo == NULL)
            {
                msg  = "ERR stgInfo for storage pool: ";
                msg += Utils::int_to_char(tmp,10,(unsigned int*)&j);
                msg += " in (fs: ";
                msg += fsName;
                msg += ") is NULL";
                log_error(msg);
                continue;
            }
            stgName = stgInfo->getName();
            err = GPFSHandler::getPollHandler()->updateDiskInfo(clusterInfo, (char*)fsName.c_str(), (char*)stgName.c_str(),1);
            if(err != M_OK)
            {
                msg  = "update disk info in (file system: ";
                msg += fsName;
                msg += ", storage pool: ";
                msg += stgName;
                msg += ") failed with ";
                msg += Utils::int_to_char(tmp,10,(unsigned int*)&err);
                log_warn(msg);
                continue;
            }  
            msg  = "Refresh storage pool: ";
            msg += stgName;
            msg += " in (fs: ";
            msg += fsName;
            msg += ")";
            log_debug(msg); 
            ret = refreshStgPool(stgInfo, clusterid, fsName);
            if(ret != TEAL_SUCCESS)
            {
                msg  = "Refresh storage pool: ";
                msg += stgName;
                msg += " in (fs: ";
                msg += fsName;
                msg += ") failed with ";
                msg += Utils::int_to_char(tmp,10,(unsigned int*)&ret);
                log_error(msg);
            }  

            int k = 0;
            nDisks = stgInfo->getNumDisks();
            //log disk one by one        
            for(; k < nDisks ; k++ )
            {
                diskInfo = stgInfo->getDisk(k);
                if(diskInfo == NULL)
                {
                    msg  = "diskInfo for disk: ";
                    msg += Utils::int_to_char(tmp,10,(unsigned int*)&k);
                    msg += " in (storage pool: ";
                    msg += stgName;
                    msg += ", fs: ";
                    msg += fsName;
                    msg += ") is NULL";
                    log_error(msg);
                    continue;
                }
                diskName = diskInfo->getName();
                msg  = "Refresh disk: ";
                msg += diskName;
                msg += " in (storage pool: ";
                msg += stgName;
                msg += ", fs: ";
                msg += fsName;
                msg += ")";
                log_debug(msg);
                ret = refreshDisk(diskInfo, clusterid);
                if(ret != TEAL_SUCCESS)
                {
                    msg  = "Refresh disk: ";
                    msg += diskName;
                    msg += " in (storage pool: ";
                    msg += stgName;
                    msg += ", fs: ";
                    msg += fsName;
                    msg += ") failed with ";
                    msg += Utils::int_to_char(tmp,10,(unsigned int*)&ret);
                    log_error(msg);
                }   
            }//end of refresh disks
        }//end of refresh stgpool
        /* core dump in GPFS 3.4, only effective in 3.5
        err = GPFSHandler::getPollHandler()->getFileSets((char*)fsName.c_str(), &fileSetList);
        if(err != M_OK)
        {
            msg  = "update fileset info in (fs: ";
            msg += fsName;
            msg += ") failed with ";
            msg += Utils::int_to_char(tmp,10,(unsigned int*)&err);
            log_error(msg);
            nFsets = 0;
            fileSetList = NULL;
            continue;  
         } //at first time to get nFsets but will not return M_OK
        */
        err = GPFSHandler::getPollHandler()->getFileSets1((char*)fsName.c_str(), fileSetList, &nFsets);
        if(nFsets <= 0)
        {
            msg  = "no fileset found in (fs: ";
            msg += fsName;
            msg += ")";
            log_warn(msg);
            nFsets = 0;
            fileSetList = NULL;
            continue;  
        }
        fileSetList = new FileSet[nFsets];

        err = GPFSHandler::getPollHandler()->getFileSets1((char*)fsName.c_str(), fileSetList, &nFsets);
        if(err != M_OK)
        {
            msg  = "update fileset info in (fs: ";
            msg += fsName;
            msg += ") failed with ";
            msg += Utils::int_to_char(tmp,10,(unsigned int*)&err);
            log_warn(msg);
            nFsets = 0;
            fileSetList = NULL;
            continue;  
        } 

        int l = 0;
        
        //log fileset one by one
        for(; l < nFsets; l++ )
        {         
            fsetInfo = &fileSetList[l];
            if(fsetInfo == NULL)
            {
                msg  = "fsetInfo for fset: ";
                msg += Utils::int_to_char(tmp,10,(unsigned int*)&i);
                msg += " in (fs: ";
                msg += fsName;
                msg += ") is NULL";
                log_error(msg);
                continue;
            }
            fsetName = fsetInfo->getName();
            msg  = "Refresh fileset: ";
            msg += fsetName;
            msg += " in (fs: ";
            msg += fsName;
            msg += ")";
            log_debug(msg);
            ret = refreshFset(fsetInfo, clusterid);
            if(ret != TEAL_SUCCESS)
            {
                msg  = "Refresh file set: ";
                msg += fsetName;
                msg += " in (fs: ";
                msg += fsName;
                msg += ") failed with";
                msg += Utils::int_to_char(tmp,10,(unsigned int*)&ret);
                log_error(msg);
            }  
        }//end of refresh fset
        if(fileSetList) 
        {
            delete []fileSetList;
            fileSetList = NULL;
            nFsets = 0;
            fsetInfo = NULL;
        }  
    }//end of refresh fs    
    
    nNodes = clusterInfo->getNumNodes();
    // to get disk access info, place this here to update num_access_disk in nodeinfo and need to invoke updateStoragePool() prior to this API
    err = GPFSHandler::getPollHandler()->updateDiskAccessInfo(clusterInfo); 
    if(err != M_OK)
    {
        msg = "update disk access info failed with ";
        msg += Utils::int_to_char(tmp,10,(unsigned int*)&err);
        log_warn(msg);
        /* Simply ignore this error to continue....
        log_error(msg);
        return;
        */
    }

    NodeInfo* nodeInfo = NULL;
    //log node one by one
    for( i = 0 ; i < nNodes; i++)
    {        
        nodeInfo = clusterInfo->getNode(i);
            
        if (nodeInfo == NULL)
        {
            msg = "nodeInfo for node ";
            msg += Utils::int_to_char(tmp,10,(unsigned int*)&i);
            msg += "is NULL";
            log_error(msg);
            continue;
        }
        nodeName = nodeInfo->getName();
        msg = "Refresh node: ";
        msg += nodeName;
        log_debug(msg);
        ret = refreshNode(nodeInfo, clusterid);
        if(ret != TEAL_SUCCESS)
        {
            msg = "Refresh node: ";
            msg += nodeName;
            msg += " failed with ";
            msg += Utils::int_to_char(tmp,10,(unsigned int*)&ret);
            log_error(msg);
            continue;
        }         
    }//end of refresh node
    //refresh free disks here since free disk number/info can only be got after invoking updateDiskInfo() to all fs/stgpool
    err = GPFSHandler::getPollHandler()->updateFreeDiskInfo(clusterInfo);
    if(err != M_OK)
    {   
        msg = "update free disk info failed with ";
        msg += Utils::int_to_char(tmp,10,(unsigned int*)&err);
        msg += ", ignore it...";
        log_warn(msg);
    }
    nDisks = clusterInfo->getNumFreeDisks();
    int k = 0;
    for(; k < nDisks ; k++ )
    {
        diskInfo = clusterInfo->getFreeDisk(k);
        if(diskInfo == NULL)
        {
            msg  = "diskInfo for free disk: ";
            msg += Utils::int_to_char(tmp,10,(unsigned int*)&i);
            msg += " is NULL";
            log_error(msg);
            continue;
        }
        diskName = diskInfo->getName();
        int s;
        int nServers = diskInfo->getNumServerItems();
        int nBacks = diskInfo->getNumBackupServerItems();
        string node_name;
        for(s = 0; s < nServers; s++)
        {
            DiskServerInfo *ds = diskInfo->getServer(s);
            node_name += string(ds->getName()) + string(" ");
        }

        for(s = 0; s < nBacks; s++)
        {
            DiskServerInfo *ds = diskInfo->getBackupServer(s);
            node_name += string(ds->getName()) + string(" ");
        }
        msg  = "Refresh free disk: ";
        msg += "(";
        msg += diskName;
        msg += ")";
        log_debug(msg);
        char svrList[NAME_STRING_LEN] = {0};
        strcpy(svrList,node_name.c_str());
        ret = refreshDisk(diskInfo, clusterid, svrList);
        if(ret != TEAL_SUCCESS)
        {
            msg  = "Refresh free disk: ";
            msg += "(";
            msg += diskName;
            msg += ") failed with ";
            msg += Utils::int_to_char(tmp,10,(unsigned int*)&ret);
            log_error(msg);
        }   
    }//end of refresh free disks
    //refresh cluster here since free disk number/info can only be got after invoking updateDiskInfo() to all fs/stgpool
    msg = "Refresh cluster: ";
    msg += clusterName;
    log_debug(msg);
    ret = refreshCluster(clusterInfo,clusterStatus);
    if(ret != TEAL_SUCCESS)
    {
        msg = "Refresh cluster failed with ";
        msg += Utils::int_to_char(tmp,10,(unsigned int*)&err);
        log_error(msg);
    }

    log_info("##################Start to refresh perseus configuration###################");

    int nRgAllocated = 6; /* number of rg slots allocated in the buffer in advance*/
    char *bufP       = NULL;
    int bufLen       = 0;
    int rc           = 0;
    int nPdisk       = 0;
    int nVdisk       = 0;
    int nRg          = 0;
    int nDa          = 0;
    string pdiskName;
    string vdiskName;
    string rgName;
    string daName;
    
    gpfsRecoveryGroupSdrInfo *rgSdrTableP  = NULL;
    gpfsRecoveryGroupSdrInfo *rgSdrP       = NULL;    
    gpfsRecoveryGroup *rgTableP            = NULL;
    gpfsRecoveryGroup *rgP                 = NULL;
    gpfsRecoveryGroupDeclusteredArray* daP = NULL;        
    gpfsDeclusteredArrayPdisk* pdiskP      = NULL;
    gpfsDeclusteredArrayVdisk* vdiskP      = NULL;
    
    rgSdrTableP = new gpfsRecoveryGroupSdrInfo[nRgAllocated];
    nRg = nRgAllocated;

    /* get initial info from SDR (all RG names) */
    rc = getNsdRAIDSdrInfo(rgSdrTableP, &nRg);
    //  retry if failed with ENOMEM
    if(rc == ENOMEM)
    {
        log_debug("Not enough memory allocated, reallocate...");
        nRgAllocated = nRg > nRgAllocated ? nRg : nRgAllocated;
        delete[] rgSdrTableP;
        rgSdrTableP = NULL;
        rgSdrTableP = new gpfsRecoveryGroupSdrInfo[nRgAllocated];
        nRg = nRgAllocated;
        rc = getNsdRAIDSdrInfo(rgSdrTableP, &nRg);
    }

    if (rc == M_OK)
    {
        if (nRg >= 1)
        {
            rgTableP = new gpfsRecoveryGroup[nRg];
        
            if (rgTableP == NULL)
            {
                msg = "Initial RG table failed with ";
                msg += Utils::int_to_char(tmp,10,(unsigned int*)&rc);
                log_error(msg);
                return;
            }
            for (i = 0, rgSdrP = rgSdrTableP; i < nRg && i < nRgAllocated; i++, rgSdrP++)
            {
                rgP = rgTableP + i;
                
                rgP->updateRgSdrInfo(rgSdrP->getRecoveryGroupName(),rgSdrP->getRecoveryGroupServerList(),rgSdrP->getRecoveryGroupId());
        
                rc = getRecoveryGroupSummary(rgP);  //refresh rg info
                if (rc == 0)
                { 
                    rgName = rgP->getRecoveryGroupName();
                    
                    rc = getRecoveryGroupDeclusteredArrays(rgP); // refresh da info
                    if (rc == 0)
                    {                        
                        int l = 0;
                        int nDa = rgP->getRecoveryGroupDeclusterArrays();
                        bool allDaOK = true; // is all DA ok?
                        for(; l < nDa; l++)
                        {
                            daP = rgP->getDeclusteredArrayP(l);
                            if(daP == NULL)
                            {
                                msg = "da: ";
                                msg += Utils::int_to_char(tmp,10,(unsigned int*)&l);
                                msg +=  "in (rg: ";
                                msg += rgName;
                                msg += ") is NULL";
                                log_error(msg);
                                continue;
                            }
                            daName = daP->getDeclusteredArrayName();
                            msg = "Refresh da: ";
                            msg += daName;
                            msg += " in rg: ";
                            msg += rgName;
                            log_debug(msg);
                            ret = refreshDa(daP, clusterid, rgName);
                            if(ret != TEAL_SUCCESS)
                            {
                                msg = "Refresh declustered array: ";
                                msg += daName;
                                msg += " in (rg: ";
                                msg += rgName;
                                msg += ") failed with ";
                                msg += Utils::int_to_char(tmp,10,(unsigned int*)&ret);
                                log_error(msg);
                            } 
                            int j = 0;
                            int k = 0;
                            nPdisk = daP->getDeclusteredArrayPdisks();
                            nVdisk = daP->getDeclusteredArrayVdisks();
                            for(; j < nPdisk; j++)
                            {
                                pdiskP = daP->getDeclusteredArrayPdiskP(j);
                                if(pdiskP == NULL)
                                {
                                    msg = "pdisk: ";
                                    msg += Utils::int_to_char(tmp,10,(unsigned int*)&j);
                                    msg += " in (rg: ";
                                    msg += rgName;
                                    msg += ", da: ";
                                    msg += daName;
                                    msg += ") is NULL";
                                    log_error(msg);
                                    continue;
                                }
                                pdiskName = pdiskP->getPdiskName();                                 
                                msg = "Refresh pdisk: ";
                                msg += pdiskName;
                                msg += " in (rg: ";
                                msg += rgName;
                                msg += ", da: ";
                                msg += daName;
                                msg += ")";
                                log_debug(msg);
                                ret = refreshPdisk(pdiskP,clusterid,rgName,daName);
                                if(ret != TEAL_SUCCESS)
                                {
                                    msg = "Refresh pdisk: ";
                                    msg += pdiskName;
                                    msg += " in (rg: ";
                                    msg += rgName;
                                    msg += ", da: ";
                                    msg += daName;
                                    msg += Utils::int_to_char(tmp,10,(unsigned int*)&ret);
                                    log_error(msg);
                                }
                            }
                            for(; k < nVdisk; k++)
                            {
                                vdiskP = daP->getDeclusteredArrayVdiskP(k);
                                if(vdiskP == NULL)
                                {
                                    msg = "vdisk: ";
                                    msg += Utils::int_to_char(tmp,10,(unsigned int*)&k);
                                    msg += " in (rg: ";
                                    msg += rgName;
                                    msg += ", da: ";
                                    msg += daName;
                                    msg += ") is NULL";
                                    log_error(msg);
                                    continue;
                                }
                                vdiskName = vdiskP->getVdiskName();
                                msg = "Refresh vdisk: ";
                                msg += vdiskName;
                                msg += " in (rg: ";
                                msg += rgName;
                                msg += ", da: ";
                                msg += daName;
                                log_debug(msg);
                                ret = refreshVdisk(vdiskP,clusterid,rgName,daName);
                                if(ret != TEAL_SUCCESS)
                                {
                                    msg = "Refresh vdisk: ";
                                    msg += vdiskName;
                                    msg += " in (rg: ";
                                    msg += rgName;
                                    msg += ", da: ";
                                    msg += daName;
                                    msg += ") failed with ";
                                    msg += Utils::int_to_char(tmp,10,(unsigned int*)&ret);
                                    log_error(msg);
                                }
                            }
                            allDaOK &= strcmp(daP->getDeclusteredNeedsService(),"yes"); // check all DA's status
                        }
                        msg = "Refresh rg: ";
                        msg += rgName;
                        log_debug(msg);  
                        ret = refreshRg(rgP, clusterid,allDaOK);
                        if(ret != TEAL_SUCCESS)
                        {
                            msg = "Refresh recovery group: ";
                            msg += rgName;
                            msg += " failed with ";
                            msg += Utils::int_to_char(tmp,10,(unsigned int*)&ret);
                            log_error(msg);
                        }  
                    }
                    else
                    {
                        msg = "get DA to refresh DA in RG: ";
                        msg += rgName;
                        msg += " failed with ";
                        msg += Utils::int_to_char(tmp,10,(unsigned int*)&rc);
                        log_warn(msg);
                        continue;
                    }
                }
                else
                {
                    msg = "get RG summary to refresh RG: ";
                    msg += Utils::int_to_char(tmp,10,(unsigned int*)&i);
                    msg += " failed with ";
                    msg += Utils::int_to_char(tmp,10,(unsigned int*)&rc);
                    log_warn(msg);
                    continue;
                }
             }
          }
        else
        {
            log_warn("No recovery group found!");
        }

    }
    else if(rc == ENODEV)
    {
        msg = "No perseus configuration..";
        log_info(msg);
    }
    else
    {
        msg = "Failed to getNsdRAIDSdrInfo with ";
        msg += Utils::int_to_char(tmp,10,(unsigned int*)&rc);
        log_warn(msg);
    }
    log_info("########################End of refresh all entities#########################################");
    return;
    
}
Пример #23
0
FileSet FileSet::FromContents(const std::string &contents) {
    FileSet ret;
    return ret.ParseAndAddEntries(contents);
}
Пример #24
0
/***************************************************************************************
This function touches up inspect search results by rescoring the sequences returned by
inspect. The function produces a new inspect results file with the scores (and delta scores)
replaced.
****************************************************************************************/
void PeptideRankScorer::recalibrate_inspect_delta_scores(char *spectra_file, 
											   char *inspect_res, 
											   char *new_res_file) const
{
	AllScoreModels* allScoreModels = static_cast<AllScoreModels*>(this->allScoreModelsPtr_);
	Config *config = allScoreModels->get_config();

	ifstream org_res(inspect_res);

	if (!  org_res.is_open() || ! org_res.good())
	{
		cout << "Error: couldn't open original inspect results file for reading:" << inspect_res << endl;
		exit(1);
	}

	ofstream new_res(new_res_file);
	if (! new_res.is_open() || ! new_res.good())
	{
			cout << "Error: couldn't open new inspect results file for writing:" << new_res << endl;
		exit(1);
	}

	char line_buff[1024];
	org_res.getline(line_buff,1024);

	bool read_line  = true;
	vector<string> field_names;
	if (line_buff[0] != '#')
	{
		read_line = false;
	}
	else
	{
		string header = string(line_buff);
		split_string(header,field_names);

		int i;
		for (i=0; i<field_names.size(); i++)
			cout << i << "\t" << field_names[i] << endl;
	}


	vector<ScanCandidateSet> cand_sets;
	vector<int> scan_mapping;
	cand_sets.clear();
	scan_mapping.resize(100000,-1);
	
	while (! org_res.eof())
	{
		vector<string> fields;

		if (read_line)
		{
			org_res.getline(line_buff,1024);
			if (org_res.gcount() < 5)
				continue;
		}
		else
		{
			read_line = true;
		}

		split_string(line_buff,fields);
		InspectResultsLine res;

		res.parse_from_fields(config,fields);

		if (cand_sets.size()==0 || ! cand_sets[cand_sets.size()-1].add_new_line(res))
		{
			ScanCandidateSet new_set;
			new_set.add_new_line(res);
			
			if (new_set.scan>=scan_mapping.size())
				scan_mapping.resize(2*scan_mapping.size(),-1);

			scan_mapping[new_set.scan]=cand_sets.size();
			cand_sets.push_back(new_set);
		}
	}
	org_res.close();

	cout << "Read results for " << cand_sets.size() << " scans..." << endl;

	FileManager fm;
	FileSet     fs;
	fm.init_from_file(config,spectra_file);
	fs.select_all_files(fm);
	const vector<SingleSpectrumFile *>& all_ssfs = fs.get_ssf_pointers();

	cout << "Read " <<  all_ssfs.size() << " spectra headers..." << endl;

	BasicSpecReader bsr;
	QCPeak *peaks = new QCPeak[5000];

	vector<bool> spectrum_indicators;
	spectrum_indicators.resize(cand_sets.size(),false);

	int num_found =0;
	int i;
	for (i=0; i<all_ssfs.size(); i++)
	{
		SingleSpectrumFile *ssf = all_ssfs[i];
		
		const int scan_number = ssf->get_scan();
		if (scan_mapping[scan_number]<0)
			continue;

		const int num_peaks = bsr.read_basic_spec(config,fm,ssf,peaks);

		spectrum_indicators[scan_mapping[scan_number]]=true;
		num_found++;

		ScanCandidateSet& cand_set = cand_sets[scan_mapping[scan_number]];
		
		cand_set.recalbirate_scores(config);

		cand_set.output_to_stream(new_res,10);
	}

	if (num_found<cand_sets.size())
	{
		cout << "Warning: found only " << num_found << "/" << cand_sets.size() << " of the scans scored by InsPecT!" << endl;
	}
	else
	{
		cout << "All scored scans found in spectrum file." << endl;
	}


	delete [] peaks;
}
Пример #25
0
/******************************************************************************
Train PMC models from positive example files
*******************************************************************************/
void PMCSQS_Scorer::train_pmc_rank_models(Config *config, const FileManager& fm, 
										  int sel_charge, bool overwrite)
{	
	const bool sample_diagnostic = false;
	const vector<int>& spectra_counts = fm.get_spectra_counts();
	
	max_model_charge=0;

	int charge;
	for (charge=1; charge<spectra_counts.size(); charge++)
	{
		if (spectra_counts[charge]>=MIN_SPECTRA_FOR_PMCSQS_MODEL)
			max_model_charge=charge;
	}

	const int max_to_read_per_file = 40000;
	
	vector<string> real_names;
	init_PMC_feature_names(real_names);


	// try and read existing pmc model, otherwise init a new one
	string pmc_path = config->get_resource_dir() + "/" + config->get_model_name() + "_PMCR.txt";
	ifstream model_stream(pmc_path.c_str());
	if (model_stream.is_open() && model_stream.good())
	{
		model_stream.close();
		string pmcr_name = config->get_model_name() + "_PMCR.txt";
		const char *path = pmc_path.c_str();
		this->read_pmc_rank_models(config,(char *)pmcr_name.c_str());
	}
	else
	{
		set_pmc_mass_thresholds();
	
		this->set_frag_pair_sum_offset(MASS_PROTON); // b+y - PM+19
		this->set_bin_increment(0.1);
		pmc_rank_models.resize(pmc_rank_mass_thresholds.size());
		pmc_charge_mz_biases.resize(pmc_rank_mass_thresholds.size());
	}
	
	const double prop_train = 0.5;


	// It is assumed that the mass thresholds were set according to the training data
	// (this is done manually with values encoded in the set_mass_threhsolds function)
	for (charge=1; charge<=max_model_charge; charge++)
	{
		if (sel_charge>0 && charge != sel_charge)
			continue;

		const int num_sizes = pmc_rank_mass_thresholds[charge].size();
		pmc_rank_models[charge].resize(num_sizes+1,NULL);
		pmc_charge_mz_biases[charge].resize(num_sizes+1,0);

		
		int size_idx;
		for (size_idx=0; size_idx<=num_sizes; size_idx++)
		{
			if (pmc_rank_models[charge][size_idx] && ! overwrite)
				continue;

			vector<SingleSpectrumFile *> test_ssfs;
			BasicSpecReader bsr;
			static QCPeak peaks[5000];
			RankBoostDataset train_ds, test_ds, pos_ds, neg_ds;

			mass_t min_mass =0;
			mass_t max_mass = POS_INF;

			if (size_idx>0)
				min_mass = pmc_rank_mass_thresholds[charge][size_idx-1];

			if (size_idx<num_sizes)
				max_mass = pmc_rank_mass_thresholds[charge][size_idx];

			// these ranges are given according to pm_with_19
			// so files should be selected through select_files and not
			// select_file_in_mz_range
			FileSet fs;		
			fs.select_files(fm,min_mass,max_mass,-1,-1,charge);

			if (fs.get_total_spectra()<500)
				continue;

			
			int num_groups_in_train=0;
			int num_groups_in_test=0;

			cout << "TRAINING charge " << charge << " size " << size_idx << "  (" <<
				min_mass << "-" << max_mass << ")" << endl;

			fs.randomly_reduce_ssfs(max_to_read_per_file);
			const vector<SingleSpectrumFile *>& all_ssf = fs.get_ssf_pointers();
			const int num_samples = all_ssf.size();
			
			// first find the bias in number of bins between the true m/z bin and
			// the optimal m/z bin
			vector<bool> skipped_idxs;
			skipped_idxs.resize(num_samples,false);
			int skipped_bad_mz=0;
			mass_t total_bias=0;
			int i;
			for (i=0; i<num_samples; i++)
			{
				SingleSpectrumFile* ssf = all_ssf[i];
				BasicSpectrum bs;
			
				bs.num_peaks = bsr.read_basic_spec(config,fm,ssf,peaks);
				bs.peaks = peaks;
				bs.ssf = ssf;

				ssf->peptide.calc_mass(config);
				
				const mass_t true_mz = (ssf->peptide.get_mass()+MASS_H2O+(mass_t)charge)/(mass_t)charge;

				if (fabs(true_mz - bs.ssf->m_over_z)>2.5)
				{
					//cout << setprecision(2) << true_mz << " <---> " << bs.ssf->m_over_z << " skipping" << endl;
					skipped_bad_mz++;
					skipped_idxs[i]=true;
					continue;
				} 

				init_for_current_spec(config,bs);
				calculate_curr_spec_pmc_values(bs, bin_increment);

				// find the true_mz_bin_idx
				
				const vector<PMCRankStats>& pmc_stats = curr_spec_rank_pmc_tables[charge];
				int true_mz_bin_idx=0;
				while (true_mz_bin_idx<pmc_stats.size() && pmc_stats[true_mz_bin_idx].m_over_z<true_mz)
					true_mz_bin_idx++;

				if (true_mz_bin_idx == pmc_stats.size())
					true_mz_bin_idx--;

				if (true_mz_bin_idx>0 && pmc_stats[true_mz_bin_idx].m_over_z-true_mz>true_mz-pmc_stats[true_mz_bin_idx-1].m_over_z)
					true_mz_bin_idx--;

				int opt_bin_idx = get_optimal_bin(true_mz_bin_idx, charge);

				if (opt_bin_idx <=0 || opt_bin_idx == pmc_stats.size()-1)
				{
					skipped_bad_mz++;
					skipped_idxs[i]=true;
					continue;
				}

				total_bias += (pmc_stats[opt_bin_idx].m_over_z - pmc_stats[true_mz_bin_idx].m_over_z);

				if (fabs(pmc_stats[opt_bin_idx].m_over_z - pmc_stats[true_mz_bin_idx].m_over_z)>4.0)
				{
					cout << "opt bin: " << opt_bin_idx << " (" << pmc_stats[opt_bin_idx].m_over_z << ")  ";
					cout << "tru bin: " << true_mz_bin_idx << " ("<< pmc_stats[true_mz_bin_idx].m_over_z << ")" << endl;
				}
			} 

			mass_t mz_bias = total_bias / (mass_t)(num_samples-skipped_bad_mz);
			pmc_charge_mz_biases[charge][size_idx]=mz_bias;

			cout << "m/z bias: " << setprecision(4) << mz_bias << endl;
			cout << "skipped " << skipped_bad_mz << "/" << num_samples <<
				"  because of m/z more than 2.5 away from observed..." << endl; 

		//	pmc_charge_mz_biases[charge][size_idx] = 0;

			for (i=0; i<num_samples; i++)
			{
				if (skipped_idxs[i])
					continue;

				SingleSpectrumFile* ssf = all_ssf[i];
				BasicSpectrum bs;
			
				bs.num_peaks = bsr.read_basic_spec(config,fm,ssf,peaks);
				bs.peaks = peaks;
				bs.ssf = ssf;
				const mass_t true_mz = (ssf->peptide.get_mass()+MASS_H2O+(mass_t)charge)/(mass_t)charge;

				init_for_current_spec(config,bs);
				calculate_curr_spec_pmc_values(bs, bin_increment);

				// find the true_mz_bin_idx
				
				const vector<PMCRankStats>& pmc_stats = curr_spec_rank_pmc_tables[charge];
				int true_mz_bin_idx=0;
				while (true_mz_bin_idx<pmc_stats.size() && pmc_stats[true_mz_bin_idx].m_over_z<true_mz)
					true_mz_bin_idx++;

				if (true_mz_bin_idx == pmc_stats.size())
					true_mz_bin_idx--;

				if (true_mz_bin_idx>0 && pmc_stats[true_mz_bin_idx].m_over_z-true_mz>true_mz-pmc_stats[true_mz_bin_idx-1].m_over_z)
					true_mz_bin_idx--;

				int opt_bin_idx = get_optimal_bin(true_mz_bin_idx, charge);

				
				static vector<RankBoostSample> spec_samples;
				fill_RankBoost_smaples_with_PMC(bs, charge, spec_samples);

				// select samples and add them to pmc_ds
				int good_idx;
				vector<int> bad_idxs;
				select_training_sample_idxs(charge,spec_samples,bs,good_idx,bad_idxs);

				const bool ind_add_to_train = (my_random()<prop_train);
				int group_idx;
				
				if (ind_add_to_train)
				{
					group_idx= num_groups_in_train++;	
				}
				else
				{
					group_idx= num_groups_in_test++;
					test_ssfs.push_back(ssf);
				}
				
				
				RankBoostDataset& ds = (ind_add_to_train ? train_ds : test_ds);

				const int pos_index  = ds.get_num_samples();
				spec_samples[good_idx].group_idx = group_idx;
				spec_samples[good_idx].rank_in_group=0;

				ds.add_sample(spec_samples[good_idx]);
				if (sample_diagnostic)
					pos_ds.add_sample(spec_samples[good_idx]);

				int j;
				for (j=0; j<bad_idxs.size(); j++)
				{
					const int bad_idx = bad_idxs[j];
					if (bad_idx < 0 || bad_idx>= spec_samples.size())
						continue;
		
					spec_samples[bad_idx].group_idx=group_idx;
					spec_samples[bad_idx].rank_in_group=1;

					ds.add_to_phi_vector(ds.get_num_samples(),pos_index);
					ds.add_sample(spec_samples[bad_idx]);

					if (sample_diagnostic)
						neg_ds.add_sample(spec_samples[bad_idx]);
				}						   
			}

			train_ds.set_num_groups(num_groups_in_train);
			test_ds.set_num_groups(num_groups_in_test);
			
			train_ds.compute_total_phi_weight();
			train_ds.initialize_potenital_lists();
			train_ds.initialzie_real_feature_table(real_names.size());

			test_ds.compute_total_phi_weight();

			if (pmc_rank_models[charge][size_idx])
				delete pmc_rank_models[charge][size_idx];
			
			pmc_rank_models[charge][size_idx] = new RankBoostModel;
		

			RankBoostModel* boost = pmc_rank_models[charge][size_idx];

			vector<string> empty;
			empty.clear();
			boost->init_rankboost_model_feature_names(empty,real_names);
			boost->init_rankboost_model_for_training(train_ds,100,25);

			train_ds.initialize_real_vote_lists(*boost);

			if (sample_diagnostic)
			{
				boost->summarize_features_pos_neg(pos_ds.get_samples(),neg_ds.get_samples());
			}
			else
				boost->summarize_features(train_ds.get_samples());

			boost->train_rankboost_model(train_ds,4000,NULL,&test_ds);
			
			boost->ouput_ranked_feature_list();

		//	output_pmc_rank_results(fm,charge,test_ssfs);

		//	exit(0);

			ind_initialized_pmcr = true;
		//	string path;
		//	path = config->get_resource_dir() + "/" + config->get_model_name() + "_PMCRtt.txt";
		//	this->write_pmc_rank_models(path.c_str());
			
		}
	}

	string path;
	path = config->get_resource_dir() + "/" + config->get_model_name() + "_PMCR.txt";
	this->write_pmc_rank_models(path.c_str());
	ind_initialized_pmcr = true;
}
Пример #26
0
void MFindDialog::DoFindCommand(
	uint32		inCommand)
{
	// check regular expression first?
	
	if (mMultiMode and IsChecked(kBatchCheckboxID) and mFindAllThread != nil)
	{
		PlaySound("warning");
		return;
	}
	
	mInSelection = IsChecked(kInSelectionCheckboxID);

	string what;
	GetText(kFindComboboxID, what);
	StoreComboText(kFindComboboxID, what, mFindStrings);

	string with;
	GetText(kReplaceComboboxID, with);
	StoreComboText(kReplaceComboboxID, with, mReplaceStrings);

	string where;
	GetText(kStartDirComboboxID, where);
	StoreComboText(kStartDirComboboxID, where, mStartDirectories);

	MTextDocument* doc = MTextDocument::GetFirstTextDocument();

	if (IsExpanded(kMultiFileExpanderID))
	{
		fs::path dir;
		bool recursive, textFilesOnly;
		string filter;
		MMultiMethod method = eMMDirectory;

		recursive = IsChecked(kRecursiveCheckboxID);
		textFilesOnly = IsChecked(kTextFilesOnlyCheckboxID);
		
		if (IsChecked(kEnableFilterCheckboxID))
			GetText(kNameFilterEditboxID, filter);
		
		switch (GetValue(kMethodPopupID))
		{
			case kMethodDirectory:
			{
				dir = where;

				if (not exists(dir) or not is_directory(dir))
					THROW(("Start directory does not exist or is not a directory"));
				
				method = eMMDirectory;
				break;
			}
			
			case kMethodIncludeFiles:
				method = eMMIncludes;
				break;
			
			case kMethodOpenWindows:
				method = eMMOpenWindows;
				break;
		}

		if (IsChecked(kBatchCheckboxID))
		{
			mFindAllThread = new boost::thread(
				boost::bind(&MFindDialog::FindAll, this, what,
					IsChecked(kIgnoreCaseCheckboxID),
					IsChecked(kRegexCheckboxID),
					method, dir, recursive, textFilesOnly, filter));
		}
		else
		{
			mMultiFiles.clear();
			
			FileSet files;
			
			GetFilesForFindAll(method, dir,
				recursive, textFilesOnly, filter, files);
				
			copy(files.begin(), files.end(), back_inserter(mMultiFiles));

			switch (inCommand)
			{
				case cmd_FindNext:
					FindNext();
					break;
				
				case cmd_ReplaceAll:
					switch (DisplayAlert("replace-all-alert"))
					{
						case kReplaceAll_Save:
							ReplaceAll(true);
							break;
						
						case kReplaceAll_LeaveOpen:
							ReplaceAll(false);
							break;
					}
					break;
			}
		}
	}
	else if (IsChecked(kBatchCheckboxID))
	{
		if (doc != nil)
		{
			MMessageList list;
			
			doc->FindAll(what, IsChecked(kIgnoreCaseCheckboxID),
				IsChecked(kRegexCheckboxID), IsChecked(kInSelectionCheckboxID), list);

			if (list.GetCount())
			{
				MMessageWindow* w = new MMessageWindow("");
				w->SetMessages(
					FormatString("Found ^0 hits for ^1",
						list.GetCount(), mFindStrings.front()),
					list);
			}
		}
	}
	else
	{
		if (doc != nil)
			doc->HandleFindDialogCommand(inCommand);
	}
}