コード例 #1
0
ファイル: NGSD.cpp プロジェクト: mdozmorov/ngs-bits
void NGSD::setReportVariants(const QString& filename, const VariantList& variants, QSet<int> selected_indices)
{
	QString ps_id = processedSampleId(filename);

	//get variant ID
	for(int i=0; i<variants.count(); ++i)
	{
		getQuery().exec("UPDATE detected_variant SET report=" + QString(selected_indices.contains(i) ? "1" : "0" ) + " WHERE processed_sample_id='" + ps_id + "' AND variant_id='" + variantId(variants[i]) + "'");
	}
}
コード例 #2
0
ファイル: NGSD.cpp プロジェクト: mdozmorov/ngs-bits
int NGSD::addColumn(VariantList& variants, QString name, QString description)
{
	variants.annotations().append(VariantAnnotationHeader(name));
	for (int i=0; i<variants.count(); ++i)
	{
		variants[i].annotations().append("");
	}

	variants.annotationDescriptions().append(VariantAnnotationDescription(name, description));

	return variants.annotations().count() - 1;
}
コード例 #3
0
ファイル: NGSD.cpp プロジェクト: mdozmorov/ngs-bits
void NGSD::annotateSomatic(VariantList& variants, QString filename)
{
	//get sample ids
	QStringList samples = filename.split('-');
	QString ts_id = sampleId(samples[0], false);

	//check if we could determine the sample
	if (ts_id=="")
	{
		Log::warn("Could not find processed sample in NGSD from name '" + QFileInfo(filename).baseName() + "'. Annotation will be incomplete because processing system could not be determined!");
	}

	//remove all NGSD-specific columns
	QList<VariantAnnotationHeader> headers = variants.annotations();
	foreach(const VariantAnnotationHeader& header, headers)
	{
		if (header.name().startsWith("som_ihdb"))
		{
			removeColumnIfPresent(variants, header.name(), true);
		}
	}

	//get required column indices
	int som_ihdb_c_idx = addColumn(variants, "som_ihdb_c", "Somatic variant count within NGSD.");
	int som_ihdb_p_idx = addColumn(variants, "som_ihdb_p", "Projects with somatic variant in NGSD.");
	//(re-)annotate the variants
	for (int i=0; i<variants.count(); ++i)
	{
		Variant& v = variants[i];

		SqlQuery query = getQuery();
		query.exec("SELECT s.id, dsv.processed_sample_id_tumor, p.name FROM detected_somatic_variant as dsv, variant as v, processed_sample ps, sample as s, project as p WHERE ps.project_id=p.id AND dsv.processed_sample_id_tumor=ps.id and dsv.variant_id=v.id AND  ps.sample_id=s.id  AND s.tumor='1' AND v.chr='"+v.chr().str()+"' AND v.start='"+QString::number(v.start())+"' AND v.end='"+QString::number(v.end())+"' AND v.ref='"+v.ref()+"' AND v.obs='"+v.obs()+"'");

		//process variants
		QMap<QByteArray, int> project_map;
		QSet<QByteArray> processed_ps_ids;
		QSet<QByteArray> processed_s_ids;
		while(query.next())
		{
			QByteArray current_sample = query.value(0).toByteArray();
			QByteArray current_ps_id = query.value(1).toByteArray();
			QByteArray current_project = query.value(2).toByteArray();

			//skip already seen processed samples (there could be several variants because of indel window, but we want to process only one)
			if (processed_ps_ids.contains(current_ps_id)) continue;
			processed_ps_ids.insert(current_ps_id);

			//skip the current sample for general statistics
			if (current_sample==ts_id) continue;

			//skip already seen samples for general statistics (there could be several processings of the same sample because of different processing systems or because of experment repeats due to quality issues)
			if (processed_s_ids.contains(current_sample)) continue;
			processed_s_ids.insert(current_sample);

			// count
			if(!project_map.contains(current_project))	project_map.insert(current_project,0);
			++project_map[current_project];
		}

		QByteArray somatic_projects;
		int somatic_count = 0;
		QMap<QByteArray, int>::const_iterator j = project_map.constBegin();
		while(j!=project_map.constEnd())
		{
			somatic_count += j.value();
			somatic_projects += j.key() + ",";
			++j;
		}
		v.annotations()[som_ihdb_c_idx] = QByteArray::number(somatic_count);
		v.annotations()[som_ihdb_p_idx] = somatic_projects;
	}
}
コード例 #4
0
ファイル: NGSD.cpp プロジェクト: mdozmorov/ngs-bits
void NGSD::annotate(VariantList& variants, QString filename)
{
	initProgress("NGSD annotation", true);

	//get sample ids
	QString s_id = sampleId(filename, false);
	QString ps_id = processedSampleId(filename, false);
	QString sys_id = getValue("SELECT processing_system_id FROM processed_sample WHERE id='" + processedSampleId(filename, false) + "'").toString();

	//check if we could determine the sample
	bool found_in_db = true;
	if (s_id=="" || ps_id=="" || sys_id=="")
	{
		Log::warn("Could not find processed sample in NGSD by name '" + filename + "'. Annotation will be incomplete because processing system could not be determined!");
		found_in_db = false;
	}

	//get sample ids that have processed samples with the same processing system (not same sample, variants imported, same processing system, good quality of sample, not tumor)
	QSet<int> sys_sample_ids;
	SqlQuery tmp = getQuery();
	tmp.exec("SELECT DISTINCT s.id FROM processed_sample as ps, sample s WHERE ps.processing_system_id='" + sys_id + "' AND ps.sample_id=s.id AND s.tumor='0' AND s.quality='good' AND s.id!='" + s_id + "' AND (SELECT count(id) FROM detected_variant as dv WHERE dv.processed_sample_id = ps.id)>0");
	while(tmp.next())
	{
		sys_sample_ids.insert(tmp.value(0).toInt());
	}

	//remove all NGSD-specific columns
	QList<VariantAnnotationHeader> headers = variants.annotations();
	foreach(const VariantAnnotationHeader& header, headers)
	{
		if (header.name().startsWith("ihdb_"))
		{
			removeColumnIfPresent(variants, header.name(), true);
		}
	}
	removeColumnIfPresent(variants, "classification", true);
	removeColumnIfPresent(variants, "classification_comment", true);
	removeColumnIfPresent(variants, "validated", true);
	removeColumnIfPresent(variants, "comment", true);

	//get required column indices
	QString num_samples = QString::number(sys_sample_ids.count());
	int ihdb_hom_idx = addColumn(variants, "ihdb_hom", "Homozygous variant counts in NGSD for the same processing system (" + num_samples + " samples).");
	int ihdb_het_idx = addColumn(variants, "ihdb_het", "Heterozyous variant counts in NGSD for the same processing system (" + num_samples + " samples).");
	int ihdb_wt_idx  = addColumn(variants, "ihdb_wt", "Wildtype variant counts in NGSD for the same processing system (" + num_samples + " samples).");
	int ihdb_all_hom_idx = addColumn(variants, "ihdb_allsys_hom", "Homozygous variant counts in NGSD independent of the processing system.");
	int ihdb_all_het_idx =  addColumn(variants, "ihdb_allsys_het", "Heterozygous variant counts in NGSD independent of the processing system.");
	int class_idx = addColumn(variants, "classification", "Classification from the NGSD.");
	int clacom_idx = addColumn(variants, "classification_comment", "Classification comment from the NGSD.");
	int valid_idx = addColumn(variants, "validated", "Validation information from the NGSD. Validation results of other samples are listed in brackets!");
	if (variants.annotationIndexByName("comment", true, false)==-1) addColumn(variants, "comment", "Comments from the NGSD. Comments of other samples are listed in brackets!");
	int comment_idx = variants.annotationIndexByName("comment", true, false);

	//(re-)annotate the variants
	SqlQuery query = getQuery();
	for (int i=0; i<variants.count(); ++i)
	{
		//QTime timer;
		//timer.start();

		//variant id
		Variant& v = variants[i];
		QByteArray v_id = variantId(v, false).toLatin1();

		//variant classification
		QVariant classification = getValue("SELECT class FROM variant_classification WHERE variant_id='" + v_id + "'", true);
		if (!classification.isNull())
		{
			v.annotations()[class_idx] = classification.toByteArray().replace("n/a", "");
			v.annotations()[clacom_idx] = getValue("SELECT comment FROM variant_classification WHERE variant_id='" + v_id + "'", true).toByteArray().replace("\n", " ").replace("\t", " ");
		}
		//int t_v = timer.elapsed();
		//timer.restart();

		//detected variant infos
		int dv_id = -1;
		QByteArray comment = "";
		if (found_in_db)
		{
			query.exec("SELECT id, comment FROM detected_variant WHERE processed_sample_id='" + ps_id + "' AND variant_id='" + v_id + "'");
			if (query.size()==1)
			{
				query.next();
				dv_id = query.value(0).toInt();
				comment = query.value(1).toByteArray();
			}
		}

		//validation info
		int vv_id = -1;
		QByteArray val_status = "";
		if (found_in_db)
		{
			query.exec("SELECT id, status FROM variant_validation WHERE sample_id='" + s_id + "' AND variant_id='" + v_id + "'");
			if (query.size()==1)
			{
				query.next();
				vv_id = query.value(0).toInt();
				val_status = query.value(1).toByteArray().replace("n/a", "");
			}
		}

		//int t_dv = timer.elapsed();
		//timer.restart();

		//validation info other samples
		int tps = 0;
		int fps = 0;
		query.exec("SELECT id, status FROM variant_validation WHERE variant_id='"+v_id+"' AND status!='n/a'");
		while(query.next())
		{
			if (query.value(0).toInt()==vv_id) continue;
			if (query.value(1).toByteArray()=="true positive") ++tps;
			else if (query.value(1).toByteArray()=="false positive") ++fps;
		}
		if (tps>0 || fps>0)
		{
			if (val_status=="") val_status = "n/a";
			val_status += " (" + QByteArray::number(tps) + "xTP, " + QByteArray::number(fps) + "xFP)";
		}
		//int t_val = timer.elapsed();
		//timer.restart();

		//comments other samples
		QList<QByteArray> comments;
		query.exec("SELECT id, comment FROM detected_variant WHERE variant_id='"+v_id+"' AND comment IS NOT NULL");
		while(query.next())
		{
			if (query.value(0).toInt()==dv_id) continue;
			QByteArray tmp = query.value(1).toByteArray().trimmed();
			if (tmp!="") comments.append(tmp);
		}
		if (comments.size()>0)
		{
			if (comment=="") comment = "n/a";
			comment += " (";
			for (int i=0; i<comments.count(); ++i)
			{
				if (i>0)
				{
					comment += ", ";
				}
				comment += comments[i];
			}
			comment += ")";
		}
		//int t_com = timer.elapsed();
		//timer.restart();

		//genotype counts
		int allsys_hom_count = 0;
		int allsys_het_count = 0;
		int sys_hom_count = 0;
		int sys_het_count = 0;
		QSet<int> s_ids_done;
		int s_id_int = s_id.toInt();
		query.exec("SELECT dv.genotype, ps.sample_id FROM detected_variant as dv, processed_sample ps WHERE dv.processed_sample_id=ps.id AND dv.variant_id='" + v_id + "'");
		while(query.next())
		{
			//skip this sample id
			int current_sample = query.value(1).toInt();
			if (current_sample==s_id_int) continue;

			//skip already seen samples (there could be several processings of the same sample because of different processing systems or because of experment repeats due to quality issues)
			if (s_ids_done.contains(current_sample)) continue;
			s_ids_done.insert(current_sample);

			QByteArray current_geno = query.value(0).toByteArray();
			if (current_geno=="hom")
			{
				++allsys_hom_count;
				if (sys_sample_ids.contains(current_sample))
				{
					++sys_hom_count;
				}
			}
			else if (current_geno=="het")
			{
				++allsys_het_count;
				if (sys_sample_ids.contains(current_sample))
				{
					++sys_het_count;
				}
			}
		}
		//qDebug() << (v.isSNV() ? "S" : "I") << query.size() << t_v << t_dv << t_val << t_com << timer.elapsed();

		v.annotations()[ihdb_all_hom_idx] = QByteArray::number(allsys_hom_count);
		v.annotations()[ihdb_all_het_idx] = QByteArray::number(allsys_het_count);
		if (found_in_db)
		{
			v.annotations()[ihdb_hom_idx] = QByteArray::number((double)sys_hom_count / sys_sample_ids.count(), 'f', 4);
			v.annotations()[ihdb_het_idx] =  QByteArray::number((double)sys_het_count / sys_sample_ids.count(), 'f', 4);
			v.annotations()[ihdb_wt_idx] =  QByteArray::number((double)(sys_sample_ids.count() - sys_hom_count - sys_het_count) / sys_sample_ids.count(), 'f', 4);
			v.annotations()[valid_idx] = val_status;
			v.annotations()[comment_idx] = comment.replace("\n", " ").replace("\t", " ");
		}
		else
		{
			v.annotations()[ihdb_hom_idx] = "n/a";
			v.annotations()[ihdb_het_idx] = "n/a";
			v.annotations()[ihdb_wt_idx] = "n/a";
			v.annotations()[valid_idx] = "n/a";
			v.annotations()[comment_idx] = "n/a";
		}

		emit updateProgress(100*i/variants.count());
	}
}