Beispiel #1
0
	void Sentence::AddSubSentence( const string str )
	{
		shared_ptr<SubSentence> subsentence(new SubSentence(str));
		SubSentenceInfo subInfo(str,subsentence);

		_subInfos.push_back(subInfo);
	}
Beispiel #2
0
	void Sentence::AddSubSentence( const std::vector<shared_ptr<DataCollection::Character>> vec )
	{
		shared_ptr<SubSentence> subsentence(new SubSentence(vec));
		SubSentenceInfo subInfo(LanguageFunc::ConvertCharacterToString(vec),subsentence);

		_subInfos.push_back(subInfo);
	}
Beispiel #3
0
void SslCertificate::generateCertificate()
{
	QStringList arguments;

	// self signed certificate
	arguments.append("req");
	arguments.append("-x509");
	arguments.append("-nodes");

	// valide duration
	arguments.append("-days");
	arguments.append(kCertificateLifetime);

	// subject information
	arguments.append("-subj");

	QString subInfo(kCertificateSubjectInfo);
	arguments.append(subInfo);

	// private key
	arguments.append("-newkey");
	arguments.append("rsa:1024");

	QString sslDirPath = QString("%1%2%3")
		.arg(m_ProfileDir)
		.arg(QDir::separator())
		.arg(kSslDir);

	QDir sslDir(sslDirPath);
	if (!sslDir.exists()) {
		sslDir.mkpath(".");
	}

	QString filename = QString("%1%2%3")
		.arg(sslDirPath)
		.arg(QDir::separator())
		.arg(kCertificateFilename);

	// key output filename
	arguments.append("-keyout");
	arguments.append(filename);

	// certificate output filename
	arguments.append("-out");
	arguments.append(filename);

	if (!runTool(arguments)) {
		return;
	}

	emit info(tr("SSL certificate generated."));

	generateFingerprint(filename);

	emit generateFinished();
}
/*
 * Generates a bloom filter outputting it to a filename
 * Input a filename to use as a subtractive filter
 * Returns the m_redundancy rate of a Bloom Filter generated from a file.
 * Currently only supports fasta files.
 *
 * Outputs to fileName path
 */
size_t BloomFilterGenerator::generate(const string &filename,
		const string &subtractFilter) {

	//need the number of hash functions used to be greater than 0
	assert(m_hashNum > 0);

	//need the filter to be greater than the size of the number of expected entries
	assert(m_filterSize > m_expectedEntries);

	//setup bloom filter
	BloomFilter filter(m_filterSize, m_hashNum, m_kmerSize);

	//load other bloom filter info
	string infoFileName = (subtractFilter).substr(0,
			(subtractFilter).length() - 2) + "txt";
	BloomFilterInfo subInfo(infoFileName);

	//load other bloomfilter
	BloomFilter filterSub(subInfo.getCalcuatedFilterSize(),
			subInfo.getHashNum(), subInfo.getKmerSize(), subtractFilter);

	if (subInfo.getKmerSize() > m_kmerSize) {
		cerr
				<< "Error: Subtraction filter's k-mer size is larger than output filter's k-mer size."
				<< endl;
		exit(1);
	}

//	//ReadProcessor for subtraction filter
//	ReadsProcessor subProc(subInfo.getKmerSize());

	size_t kmerRemoved = 0;

	//for each file loop over all headers and obtain seq
	//load input file + make filter
	for (boost::unordered_map<string, vector<string> >::iterator i =
			m_fileNamesAndHeaders.begin(); i != m_fileNamesAndHeaders.end(); ++i) {
		//let user know that files are being read
		cerr << "Processing File: " << i->first << endl;
		WindowedFileParser parser(i->first, m_kmerSize);
		for (vector<string>::iterator j = i->second.begin();
				j != i->second.end(); ++j) {
			parser.setLocationByHeader(*j);
			//object to process reads
			//insert elements into filter
			//read fasta file line by line and split using sliding window
			while (parser.notEndOfSeqeunce()) {
				const unsigned char* currentSeq = parser.getNextSeq();
				if (currentSeq != NULL) {
					//allow kmer into filter?
					bool allowKmer = false;

					//Check if kmer or subkmers are located in filter
					if (subInfo.getKmerSize() == m_kmerSize) {
						//if kmer does not exist set allowance to true
						allowKmer = !filterSub.contains(currentSeq);
					} else {
						//TODO make compatable with smaller kmer sizes
						cerr
								<< "ERROR: Must use identical size k-mers in subtractive filter"
								<< endl;
//						uint16_t subSections = kmerSize - kmerSize;
//						for (uint16_t i = 0; i <= subSections; ++i) {
//							if (!filterSub.contains(subProc.prepSeq(currentSeq, i)))
//							{
//								//if any sub kmer does not exists set allowance to true
//								allowKmer = true;
//								break;
//							}
//						}
					}

					if (allowKmer) {
						const vector<size_t> &tempHash = multiHash(currentSeq,
								m_hashNum, m_kmerSize);
						if (filter.contains(tempHash)) {
							m_redundancy++;
						} else {
							filter.insert(tempHash);
							m_totalEntries++;
						}
					} else {
						++kmerRemoved;
					}
				}
			}
		}
	}

	cerr << "Total Number of K-mers not added: " << kmerRemoved << endl;

	filter.storeFilter(filename);
	return m_redundancy;
}
/*
 * Generates a bloom filter outputting it to a filename
 * Returns the m_redundancy rate of a Bloom Filter generated from a file.
 * Currently only supports fasta files for initial seeding
 *
 * Uses fastq files to stream in additional sequence
 * Stops after m_expectedEntries of entries
 *
 * Outputs to fileName path
 */
size_t BloomFilterGenerator::generateProgressive(const string &filename,
		double score, const string &file1, const string &file2, createMode mode,
		const string &subtractFilter )
{

	//need the number of hash functions used to be greater than 0
	assert(m_hashNum > 0);

	//need the filter to be greater than the size of the number of expected entries
	assert(m_filterSize > m_expectedEntries);

	//setup bloom filter
	BloomFilter filter(m_filterSize, m_hashNum, m_kmerSize);

	//load other bloom filter info
	string infoFileName = (subtractFilter).substr(0,
			(subtractFilter).length() - 2) + "txt";
	BloomFilterInfo subInfo(infoFileName);

	//load other bloomfilter
	BloomFilter filterSub(subInfo.getCalcuatedFilterSize(),
			subInfo.getHashNum(), subInfo.getKmerSize(), subtractFilter);

	if (subInfo.getKmerSize() != m_kmerSize) {
		cerr
				<< "Error: Subtraction filter's different from current filter's k-mer size."
				<< endl;
		exit(1);
	}

	//for each file loop over all headers and obtain seq
	//load input file + make filter
	for (boost::unordered_map<string, vector<string> >::iterator i =
			m_fileNamesAndHeaders.begin(); i != m_fileNamesAndHeaders.end(); ++i) {
		//let user know that files are being read
		cerr << "Processing File: " << i->first << endl;
		WindowedFileParser parser(i->first, m_kmerSize);
		for (vector<string>::iterator j = i->second.begin();
				j != i->second.end(); ++j) {
			parser.setLocationByHeader(*j);
			//object to process reads
			//insert elements into filter
			//read fasta file line by line and split using sliding window
			while (parser.notEndOfSeqeunce()) {
				const unsigned char* currentSeq = parser.getNextSeq();
				checkAndInsertKmer(currentSeq, filter);
			}
		}
	}

	size_t totalReads = 0;

	FastaReader sequence1(file1.c_str(), FastaReader::NO_FOLD_CASE);
	FastaReader sequence2(file2.c_str(), FastaReader::NO_FOLD_CASE);
#pragma omp parallel
	for (FastqRecord rec1;;) {
		FastqRecord rec2;
		bool good1;
		bool good2;

		if (m_totalEntries >= m_expectedEntries) {
			//so threshold message only printed once
			if (sequence1.eof() || sequence1.eof()) {
				break;
			}
#pragma omp critical(breakClose)
			{
				sequence1.breakClose();
				sequence2.breakClose();
				cerr << "K-mer threshold reached at read " << totalReads << endl;
			}
		}

#pragma omp critical(sequence)
		{
			good1 = sequence1 >> rec1;
			good2 = sequence2 >> rec2;
		}

		if (good1 && good2) {
#pragma omp critical(totalReads)
			{
				++totalReads;
				if (totalReads % 10000000 == 0) {
					cerr << "Currently Reading Read Number: " << totalReads
							<< endl;
				}
			}
			ReadsProcessor proc(m_kmerSize);
			string tempStr1 = rec1.id.substr(0, rec1.id.find_last_of("/"));
			string tempStr2 = rec2.id.substr(0, rec2.id.find_last_of("/"));
			if (tempStr1 == tempStr2) {
				unsigned size1 = rec1.seq.length() - m_kmerSize + 1;
				unsigned size2 = rec2.seq.length() - m_kmerSize + 1;
				vector<vector<size_t> > hashValues1(size1);
				vector<vector<size_t> > hashValues2(size2);
				switch (mode) {
				case PROG_INC: {
					if (SeqEval::evalSingle(rec1, m_kmerSize, filter,
									score * double(size1),
									(1.0 - score) * double(size1), m_hashNum,
									hashValues1, filterSub)) {
						//load remaining sequences
						for (unsigned i = 0; i < size1; ++i) {
							if (hashValues1[i].empty()) {
								const unsigned char* currentSeq = proc.prepSeq(
										rec1.seq, i);
								checkAndInsertKmer(currentSeq, filter);
							} else {
								insertKmer(hashValues1[i], filter);
							}
						}
						//load store second read
						for (unsigned i = 0; i < size2; ++i) {
							const unsigned char* currentSeq = proc.prepSeq(
									rec2.seq, i);
							checkAndInsertKmer(currentSeq, filter);
						}
					} else if (SeqEval::evalSingle(rec2, m_kmerSize, filter,
									score * size2, (1.0 - score) * size2, m_hashNum,
									hashValues2, filterSub)) {
						//load remaining sequences
						for (unsigned i = 0; i < size1; ++i) {
							if (hashValues1[i].empty()) {
								const unsigned char* currentSeq = proc.prepSeq(
										rec1.seq, i);
								checkAndInsertKmer(currentSeq, filter);
							} else {
								insertKmer(hashValues1[i], filter);
							}
						}
						//load store second read
						for (unsigned i = 0; i < size2; ++i) {
							if (hashValues2[i].empty()) {
								const unsigned char* currentSeq = proc.prepSeq(
										rec2.seq, i);
								checkAndInsertKmer(currentSeq, filter);
							} else {
								insertKmer(hashValues2[i], filter);
							}
						}
					}
					break;
				}
				case PROG_STD: {
					if (SeqEval::evalSingle(rec1, m_kmerSize, filter,
							score * double(size1),
							(1.0 - score) * double(size1), m_hashNum,
							hashValues1, filterSub)
							&& SeqEval::evalSingle(rec2, m_kmerSize, filter,
									score * size2, (1.0 - score) * size2,
									m_hashNum, hashValues2, filterSub)) {
						//load remaining sequences
						for (unsigned i = 0; i < size1; ++i) {
							if (hashValues1[i].empty()) {
								const unsigned char* currentSeq = proc.prepSeq(
										rec1.seq, i);
								checkAndInsertKmer(currentSeq, filter);
							} else {
								insertKmer(hashValues1[i], filter);
							}
						}
						//load store second read
						for (unsigned i = 0; i < size2; ++i) {
							if (hashValues2[i].empty()) {
								const unsigned char* currentSeq = proc.prepSeq(
										rec2.seq, i);
								checkAndInsertKmer(currentSeq, filter);
							} else {
								insertKmer(hashValues2[i], filter);
							}
						}
					}
					break;
				}
				}
			} else {
				cerr << "Read IDs do not match" << "\n" << tempStr1 << "\n"
						<< tempStr2 << endl;
				exit(1);
			}
		} else
			break;
	}
	if (!sequence1.eof() || !sequence2.eof()) {
		cerr
				<< "error: eof bit not flipped. Input files may be different lengths"
				<< endl;
	}

	filter.storeFilter(filename);
	return m_redundancy;
}