Пример #1
0
/*private*/
LineSequencer::Sequences*
LineSequencer::findSequences()
{
	Sequences *sequences = new Sequences();
	planargraph::algorithm::ConnectedSubgraphFinder csFinder(graph);
	vector<planargraph::Subgraph*> subgraphs;
	csFinder.getConnectedSubgraphs(subgraphs);
	for (vector<planargraph::Subgraph*>::const_iterator
		it=subgraphs.begin(), endIt=subgraphs.end();
		it!=endIt;
		++it )
	{
		planargraph::Subgraph* subgraph = *it;
		if (hasSequence(*subgraph)) {
			planargraph::DirectedEdge::NonConstList* seq=findSequence(*subgraph);
			sequences->push_back(seq);
		}
		else {
			// if any subgraph cannot be sequenced, abort
			delete subgraph;
			delAll(*sequences);
			delete sequences;
			return nullptr;
		}
		delete subgraph;
	}
	return sequences;
}
Пример #2
0
// la séquence initiale est lue et découpée en sous-séquences de
// longueur 20 ;
// chaque sous-séquence est terminée par une observation END pour que
// le HMM généré possède un état terminal
Sequences build_initial_sequences(const std::string& name)
{
    Sequence sequence = read_initial_sequence(name, false);
    Sequences sequences;
    unsigned int length = sequence.size() / 20;
    Sequence::const_iterator it = sequence.begin();
    unsigned int added = 0;
    unsigned int index = 0;

    sequences.push_back(Sequence());
    while (it != sequence.end()) {
        if (added == length) {
            sequences[index].push_back(Observation(0, END));
            ++index;
            added = 0;
            sequences.push_back(Sequence());
        }
        sequences[index].push_back(*it);
        ++added;
        ++it;
    }
    if (sequence.size() % 20 != 0) {
        sequences[index].push_back(Observation(0, END));
    }
    return sequences;
}
Пример #3
0
int main(int argc, char** argv)
{
	if (argc < 4)
	{
		std::cout << "Usage: get_TrieArray <fastafile> <matrix> <peptideLength> <outfile> " << std::endl;
		return -1;
	}

	string fastafile(argv[1]);
	string matrix(argv[2]);
	cout << "test" << endl;
	int peptideLength(atoi(argv[3]));
	string outname(argv[4]);

	
	//std::cout << fastafile << "\t" << outname << std::endl;
		
//----------------------------------------------------------------------------------------
	cout << "Reading FASTA file..." << endl;

	Sequences s(fastafile);
	cout << "Read " << s.size() << " sequences." << endl;

	cout << "Generating peptides..." << endl;
	Sequences ninemers;
	generateAllSubstrings(ninemers, s, peptideLength);
	cout << "Generated " << ninemers.size() << " peptides." << endl;

	s.clear();


  	//Matrix m("/abi-projects/dist2self/matrices/BLOSUM45_distance_normal.dat"); cout << "Initializing trie. " << endl; Trie t; 
  	Matrix m(matrix); cout << "Initializing trie. " << endl; Trie t;
	Matrix::IndexSequence indices; 
  	for (size_t i = 0; i < ninemers.size(); ++i) {
 
  	 m.translate(ninemers[i], indices); t.add(indices);
  	} 
    t.dump();


	cout << "Converting to trie array." << endl;
  	TrieArray ta(t, peptideLength);

	cout << "Done." << endl;

//	std::ofstream ofs("test.trie");
	std::ofstream ofs(outname.c_str());
	boost::archive::text_oarchive oa(ofs);
	ta.save(oa,1);


}
Пример #4
0
// fonction de sauvegarde de la liste des sous-séquences dans les
// fichiers .input
void save_sequences(const std::string& name, const Sequences& sequences)
{
    std::ofstream f((boost::format("seq-%1%.input") %
                         name).str().c_str());

    for (Sequences::const_iterator it = sequences.begin();
         it != sequences.end(); ++it) {
        for (Sequence::const_iterator it2 = it->begin();
             it2 != it->end(); ++it2) {
            f << it2->first << "|" << it2->second << " ";
        }
        f << std::endl;
    }
    f.close();
}
Пример #5
0
void generateAllSubstrings(Sequences& substrings, const Sequences& s, size_t len)
{

  	
	for (size_t i = 0; i < s.size(); ++i)
	{
		for (int j = 0; j <= ((int)s[i].size() - (int)len); ++j)
		{
			string sub(string(&(s[i][j]), len));
			substrings.push_back(sub);
			if (sub.size() != len)
			  cout << "generateAllSubstrings size: " << sub.size() << endl;
		}
	}
}
Пример #6
0
int main(int argc, char** argv)
{
	if (argc < 2)
	{
		std::cout << "Usage: get_TrieArray <fastafile> <outfile> " << std::endl;
		return -1;
	}

	ofstream myfile;
	string fastafile(argv[1]);
	const char* outname(argv[2]);
	
	myfile.open(outname);

	std::cout << fastafile << "\t" << outname << std::endl;
		
//----------------------------------------------------------------------------------------
	cout << "Reading FASTA file..." << endl;

	Sequences s(fastafile);
	cout << "Read " << s.size() << " sequences." << endl;

	cout << "Generating ninemers..." << endl;
	Sequences ninemers;
	generateAllSubstrings(ninemers, s, 9);
	cout << "Generated " << ninemers.size() << " ninemers." << endl;

	s.clear();


  	for (size_t i = 0; i < ninemers.size(); ++i) {
  		myfile << "> " << i << endl;
  		myfile << ninemers[i] << endl; 
  	} 

  	myfile.close();

	cout << "Done." << endl;


}