int NGSD::geneToApprovedID(const QByteArray& gene) { //init static SqlQuery q_gene = getQuery(true); static SqlQuery q_prev = getQuery(true); static SqlQuery q_syn = getQuery(true); static bool init = false; if (!init) { q_gene.prepare("SELECT id FROM gene WHERE symbol=:1"); q_prev.prepare("SELECT g.id FROM gene g, gene_alias ga WHERE g.id=ga.gene_id AND ga.symbol=:1 AND ga.type='previous'"); q_syn.prepare("SELECT g.id FROM gene g, gene_alias ga WHERE g.id=ga.gene_id AND ga.symbol=:1 AND ga.type='synonym'"); init = true; } //approved q_gene.bindValue(0, gene); q_gene.exec(); if (q_gene.size()==1) { q_gene.next(); return q_gene.value(0).toInt(); } //previous q_prev.bindValue(0, gene); q_prev.exec(); if (q_prev.size()==1) { q_prev.next(); return q_prev.value(0).toInt(); } else if(q_prev.size()>1) { return -1; } //synonymous q_syn.bindValue(0, gene); q_syn.exec(); if (q_syn.size()==1) { q_syn.next(); return q_syn.value(0).toInt(); } return -1; }
QStringList NGSD::getEnum(QString table, QString column) { //check cache static QMap<QString, QStringList> cache; QString hash = table+"."+column; if (cache.contains(hash)) { return cache.value(hash); } //DB query SqlQuery q = getQuery(); q.exec("DESCRIBE "+table+" "+column); while (q.next()) { QString type = q.value(1).toString(); type.replace("'", ""); type.replace("enum(", ""); type.replace(")", ""); cache[hash] = type.split(","); return cache[hash]; } THROW(ProgrammingException, "Could not determine enum values of column '"+column+"' in table '"+table+"'!"); }
bool NGSD::tableEmpty(QString table) { SqlQuery query = getQuery(); query.exec("SELECT COUNT(*) FROM " + table); query.next(); return query.value(0).toInt()==0; }
void NGSD::init(QString password) { //remove existing tables SqlQuery query = getQuery(); query.exec("SHOW TABLES"); if (query.size()>0) { //check password for re-init of production DB if (!test_db_ && password!=Settings::string("ngsd_pass")) { THROW(DatabaseException, "Password provided for re-initialization of procution database is incorrect!"); } //get table list QStringList tables; while(query.next()) { tables << query.value(0).toString(); } //remove old tables if (!tables.isEmpty()) { query.exec("SET FOREIGN_KEY_CHECKS = 0;"); query.exec("DROP TABLE " + tables.join(",")); query.exec("SET FOREIGN_KEY_CHECKS = 1;"); } } //initilize executeQueriesFromFile(":/resources/NGSD_schema.sql"); }
QStringList NGSD::getValues(const QString& query) { SqlQuery q = getQuery(); q.exec(query); QStringList output; output.reserve(q.size()); while(q.next()) { output.append(q.value(0).toString()); } return output; }
ValidationInfo NGSD::getValidationStatus(const QString& filename, const Variant& variant) { SqlQuery query = getQuery(); query.exec("SELECT status, type, comment FROM variant_validation WHERE sample_id='" + sampleId(filename) + "' AND variant_id='" + variantId(variant) + "'"); if (query.size()==0) { return ValidationInfo(); } else { query.next(); return ValidationInfo{ query.value(0).toString().trimmed(), query.value(1).toString().trimmed(), query.value(2).toString().trimmed() }; } }
QPair<QString, QString> NGSD::getClassification(const Variant& variant) { SqlQuery query = getQuery(); query.exec("SELECT class, comment FROM variant_classification WHERE variant_id='" + variantId(variant) + "'"); if (query.size()==0) { return QPair<QString, QString>("n/a", ""); } else { query.next(); return QPair<QString, QString>(query.value(0).toString().trimmed(), query.value(1).toString().trimmed()); } }
void ListModel::_loadItems(ListItem* parent) { int parentId = parent->id(); SqlQuery sql; sql.prepare("SELECT id, weight, content, is_expanded, is_project, is_milestone, is_highlighted, is_checkable, is_completed, is_cancelled, due_date, priority " "FROM list_item WHERE list_id = :list AND parent_id = :parent " "ORDER BY weight ASC"); sql.bindValue(":list", _listId); sql.bindValue(":parent", parentId); if (!sql.exec()) return; int currRow = 0; while (sql.next()) { int c = -1; int id = sql.value(++c).toInt(); int row = sql.value(++c).toInt(); QString content = sql.value(++c).toString(); bool isExpanded = sql.value(++c).toBool(); bool isProject = sql.value(++c).toBool(); bool isMilestone = sql.value(++c).toBool(); bool isHighlighted = sql.value(++c).toBool(); bool isCheckable = sql.value(++c).toBool(); bool isCompleted = sql.value(++c).toBool(); bool isCancelled = sql.value(++c).toBool(); QDate dueDate = sql.value(++c).toDate(); int priority = sql.value(++c).toInt(); // Fix gap between row in database if (row != currRow) { SqlQuery sql; sql.prepare("UPDATE list_item SET weight = :weight WHERE id = :id"); sql.bindValue(":weight", currRow); sql.bindValue(":id", id); sql.exec(); } ListItem* item = new ListItem(_listId, id, content, isExpanded, isProject, isMilestone, isHighlighted, isCheckable, isCompleted, isCancelled, dueDate, priority); parent->appendChild(item); _loadItems(item); ++currRow; } }
QString NGSD::variantId(const Variant& variant, bool throw_if_fails) { SqlQuery query = getQuery(); //use binding user input (safety) query.prepare("SELECT id FROM variant WHERE chr=:chr AND start='"+QString::number(variant.start())+"' AND end='"+QString::number(variant.end())+"' AND ref=:ref AND obs=:obs"); query.bindValue(":chr", variant.chr().str()); query.bindValue(":ref", variant.ref()); query.bindValue(":obs", variant.obs()); query.exec(); if (query.size()==0) { if (throw_if_fails) { THROW(DatabaseException, "Variant " + variant.toString() + " not found in NGSD!"); } else { return "-1"; } } query.next(); return query.value(0).toString(); }
QCCollection NGSD::getQCData(const QString& filename) { QString ps_id = processedSampleId(filename, false); //get QC data SqlQuery q = getQuery(); q.exec("SELECT n.name, nm.value, n.description, n.qcml_id FROM processed_sample_qc as nm, qc_terms as n WHERE nm.processed_sample_id='" + ps_id + "' AND nm.qc_terms_id=n.id"); QCCollection output; while(q.next()) { output.insert(QCValue(q.value(0).toString(), q.value(1).toString(), q.value(2).toString(), q.value(3).toString())); } //get KASP data SqlQuery q2 = getQuery(); q2.exec("SELECT random_error_prob FROM kasp_status WHERE processed_sample_id='" + ps_id + "'"); QString value = "n/a"; if (q2.size()>0) { q2.next(); float numeric_value = 100.0 * q2.value(0).toFloat(); if (numeric_value>100.0) //special case: random_error_prob>100% { value = "<font color=orange>KASP not performed (see NGSD)</font>"; } else if (numeric_value>1.0) //random_error_prob>1% => warn { value = "<font color=red>"+QString::number(numeric_value)+"%</font>"; } else { value = QString::number(numeric_value)+"%"; } } output.insert(QCValue("kasp", value)); return output; }
QVector<double> NGSD::getQCValues(const QString& accession, const QString& filename) { //get processing system ID QString sys_id = getValue("SELECT processing_system_id FROM processed_sample WHERE id='" + processedSampleId(filename) + "'").toString(); //get QC id QString qc_id = getValue("SELECT id FROM qc_terms WHERE qcml_id='" + accession + "'").toString(); //get QC data SqlQuery q = getQuery(); q.exec("SELECT nm.value FROM processed_sample_qc as nm, processed_sample as ps WHERE ps.processing_system_id='" + sys_id + "' AND nm.qc_terms_id='" + qc_id + "' AND nm.processed_sample_id=ps.id "); //fill output datastructure QVector<double> output; while(q.next()) { bool ok = false; double value = q.value(0).toDouble(&ok); if (ok) output.append(value); } return output; }
QString NGSD::sampleId(const QString& filename, bool throw_if_fails) { QStringList parts = QFileInfo(filename).baseName().append('_').split('_'); //get sample ID SqlQuery query = getQuery(); //use binding (user input) query.prepare("SELECT id FROM sample WHERE name=:sample"); query.bindValue(":sample", parts[0]); query.exec(); if (query.size()==0) { if(throw_if_fails) { THROW(DatabaseException, "Sample name '" + parts[0] + "' not found in NGSD!"); } else { return ""; } } query.next(); return query.value(0).toString(); }
QVariant NGSD::getValue(const QString& query, bool no_value_is_ok) { SqlQuery q = getQuery(); q.exec(query); if (q.size()==0) { if (no_value_is_ok) { return QVariant(); } else { THROW(DatabaseException, "NGSD single value query returned no value: " + query); } } if (q.size()>1) { THROW(DatabaseException, "NGSD single value query returned several values: " + query); } q.next(); return q.value(0); }
QString NGSD::processedSampleId(const QString& filename, bool throw_if_fails) { QStringList parts = QFileInfo(filename).baseName().append('_').split('_'); //get sample ID SqlQuery query = getQuery(); //use binding (user input) query.prepare("SELECT ps.id FROM processed_sample ps, sample s WHERE s.name=:sample AND ps.sample_id=s.id AND ps.process_id=:psnum"); query.bindValue(":sample", parts[0]); query.bindValue(":psnum", QString::number(parts[1].toInt())); query.exec(); if (query.size()==0) { if(throw_if_fails) { THROW(DatabaseException, "Processed sample name '" + parts[0] + "_" + parts[1] + "' not found in NGSD!"); } else { return ""; } } query.next(); return query.value(0).toString(); }
QPair<QByteArray, QByteArray> NGSD::geneToApproved(const QByteArray& gene) { //init static SqlQuery q_gene = getQuery(true); static SqlQuery q_prev = getQuery(true); static SqlQuery q_syn = getQuery(true); static bool init = false; if (!init) { q_gene.prepare("SELECT id FROM gene WHERE symbol=:1"); q_prev.prepare("SELECT g.symbol FROM gene g, gene_alias ga WHERE g.id=ga.gene_id AND ga.symbol=:1 AND ga.type='previous'"); q_syn.prepare("SELECT g.symbol FROM gene g, gene_alias ga WHERE g.id=ga.gene_id AND ga.symbol=:1 AND ga.type='synonym'"); init = true; } //approved q_gene.bindValue(0, gene); q_gene.exec(); if (q_gene.size()==1) { q_gene.next(); return qMakePair(gene, QByteArray("KEPT: is approved symbol")); } //previous q_prev.bindValue(0, gene); q_prev.exec(); if (q_prev.size()==1) { q_prev.next(); return qMakePair(q_prev.value(0).toByteArray(), "REPLACED: " + gene + " is a previous symbol"); } else if(q_prev.size()>1) { QByteArray genes; while(q_prev.next()) { if (!genes.isEmpty()) genes.append(", "); genes.append(q_prev.value(0).toByteArray()); } return qMakePair(gene, "ERROR: is a previous symbol of the genes " + genes); } //synonymous q_syn.bindValue(0, gene); q_syn.exec(); if (q_syn.size()==1) { q_syn.next(); return qMakePair(q_syn.value(0).toByteArray(), "REPLACED: " + gene + " is a synonymous symbol"); } else if(q_syn.size()>1) { QByteArray genes; while(q_syn.next()) { if (!genes.isEmpty()) genes.append(", "); genes.append(q_syn.value(0).toByteArray()); } return qMakePair(gene, "ERROR: is a synonymous symbol of the genes " + genes); } return qMakePair(gene, QByteArray("ERROR: is unknown symbol")); }
void NGSD::annotate(VariantList& variants, QString filename) { initProgress("NGSD annotation", true); //get sample ids QString s_id = sampleId(filename, false); QString ps_id = processedSampleId(filename, false); QString sys_id = getValue("SELECT processing_system_id FROM processed_sample WHERE id='" + processedSampleId(filename, false) + "'").toString(); //check if we could determine the sample bool found_in_db = true; if (s_id=="" || ps_id=="" || sys_id=="") { Log::warn("Could not find processed sample in NGSD by name '" + filename + "'. Annotation will be incomplete because processing system could not be determined!"); found_in_db = false; } //get sample ids that have processed samples with the same processing system (not same sample, variants imported, same processing system, good quality of sample, not tumor) QSet<int> sys_sample_ids; SqlQuery tmp = getQuery(); tmp.exec("SELECT DISTINCT s.id FROM processed_sample as ps, sample s WHERE ps.processing_system_id='" + sys_id + "' AND ps.sample_id=s.id AND s.tumor='0' AND s.quality='good' AND s.id!='" + s_id + "' AND (SELECT count(id) FROM detected_variant as dv WHERE dv.processed_sample_id = ps.id)>0"); while(tmp.next()) { sys_sample_ids.insert(tmp.value(0).toInt()); } //remove all NGSD-specific columns QList<VariantAnnotationHeader> headers = variants.annotations(); foreach(const VariantAnnotationHeader& header, headers) { if (header.name().startsWith("ihdb_")) { removeColumnIfPresent(variants, header.name(), true); } } removeColumnIfPresent(variants, "classification", true); removeColumnIfPresent(variants, "classification_comment", true); removeColumnIfPresent(variants, "validated", true); removeColumnIfPresent(variants, "comment", true); //get required column indices QString num_samples = QString::number(sys_sample_ids.count()); int ihdb_hom_idx = addColumn(variants, "ihdb_hom", "Homozygous variant counts in NGSD for the same processing system (" + num_samples + " samples)."); int ihdb_het_idx = addColumn(variants, "ihdb_het", "Heterozyous variant counts in NGSD for the same processing system (" + num_samples + " samples)."); int ihdb_wt_idx = addColumn(variants, "ihdb_wt", "Wildtype variant counts in NGSD for the same processing system (" + num_samples + " samples)."); int ihdb_all_hom_idx = addColumn(variants, "ihdb_allsys_hom", "Homozygous variant counts in NGSD independent of the processing system."); int ihdb_all_het_idx = addColumn(variants, "ihdb_allsys_het", "Heterozygous variant counts in NGSD independent of the processing system."); int class_idx = addColumn(variants, "classification", "Classification from the NGSD."); int clacom_idx = addColumn(variants, "classification_comment", "Classification comment from the NGSD."); int valid_idx = addColumn(variants, "validated", "Validation information from the NGSD. Validation results of other samples are listed in brackets!"); if (variants.annotationIndexByName("comment", true, false)==-1) addColumn(variants, "comment", "Comments from the NGSD. Comments of other samples are listed in brackets!"); int comment_idx = variants.annotationIndexByName("comment", true, false); //(re-)annotate the variants SqlQuery query = getQuery(); for (int i=0; i<variants.count(); ++i) { //QTime timer; //timer.start(); //variant id Variant& v = variants[i]; QByteArray v_id = variantId(v, false).toLatin1(); //variant classification QVariant classification = getValue("SELECT class FROM variant_classification WHERE variant_id='" + v_id + "'", true); if (!classification.isNull()) { v.annotations()[class_idx] = classification.toByteArray().replace("n/a", ""); v.annotations()[clacom_idx] = getValue("SELECT comment FROM variant_classification WHERE variant_id='" + v_id + "'", true).toByteArray().replace("\n", " ").replace("\t", " "); } //int t_v = timer.elapsed(); //timer.restart(); //detected variant infos int dv_id = -1; QByteArray comment = ""; if (found_in_db) { query.exec("SELECT id, comment FROM detected_variant WHERE processed_sample_id='" + ps_id + "' AND variant_id='" + v_id + "'"); if (query.size()==1) { query.next(); dv_id = query.value(0).toInt(); comment = query.value(1).toByteArray(); } } //validation info int vv_id = -1; QByteArray val_status = ""; if (found_in_db) { query.exec("SELECT id, status FROM variant_validation WHERE sample_id='" + s_id + "' AND variant_id='" + v_id + "'"); if (query.size()==1) { query.next(); vv_id = query.value(0).toInt(); val_status = query.value(1).toByteArray().replace("n/a", ""); } } //int t_dv = timer.elapsed(); //timer.restart(); //validation info other samples int tps = 0; int fps = 0; query.exec("SELECT id, status FROM variant_validation WHERE variant_id='"+v_id+"' AND status!='n/a'"); while(query.next()) { if (query.value(0).toInt()==vv_id) continue; if (query.value(1).toByteArray()=="true positive") ++tps; else if (query.value(1).toByteArray()=="false positive") ++fps; } if (tps>0 || fps>0) { if (val_status=="") val_status = "n/a"; val_status += " (" + QByteArray::number(tps) + "xTP, " + QByteArray::number(fps) + "xFP)"; } //int t_val = timer.elapsed(); //timer.restart(); //comments other samples QList<QByteArray> comments; query.exec("SELECT id, comment FROM detected_variant WHERE variant_id='"+v_id+"' AND comment IS NOT NULL"); while(query.next()) { if (query.value(0).toInt()==dv_id) continue; QByteArray tmp = query.value(1).toByteArray().trimmed(); if (tmp!="") comments.append(tmp); } if (comments.size()>0) { if (comment=="") comment = "n/a"; comment += " ("; for (int i=0; i<comments.count(); ++i) { if (i>0) { comment += ", "; } comment += comments[i]; } comment += ")"; } //int t_com = timer.elapsed(); //timer.restart(); //genotype counts int allsys_hom_count = 0; int allsys_het_count = 0; int sys_hom_count = 0; int sys_het_count = 0; QSet<int> s_ids_done; int s_id_int = s_id.toInt(); query.exec("SELECT dv.genotype, ps.sample_id FROM detected_variant as dv, processed_sample ps WHERE dv.processed_sample_id=ps.id AND dv.variant_id='" + v_id + "'"); while(query.next()) { //skip this sample id int current_sample = query.value(1).toInt(); if (current_sample==s_id_int) continue; //skip already seen samples (there could be several processings of the same sample because of different processing systems or because of experment repeats due to quality issues) if (s_ids_done.contains(current_sample)) continue; s_ids_done.insert(current_sample); QByteArray current_geno = query.value(0).toByteArray(); if (current_geno=="hom") { ++allsys_hom_count; if (sys_sample_ids.contains(current_sample)) { ++sys_hom_count; } } else if (current_geno=="het") { ++allsys_het_count; if (sys_sample_ids.contains(current_sample)) { ++sys_het_count; } } } //qDebug() << (v.isSNV() ? "S" : "I") << query.size() << t_v << t_dv << t_val << t_com << timer.elapsed(); v.annotations()[ihdb_all_hom_idx] = QByteArray::number(allsys_hom_count); v.annotations()[ihdb_all_het_idx] = QByteArray::number(allsys_het_count); if (found_in_db) { v.annotations()[ihdb_hom_idx] = QByteArray::number((double)sys_hom_count / sys_sample_ids.count(), 'f', 4); v.annotations()[ihdb_het_idx] = QByteArray::number((double)sys_het_count / sys_sample_ids.count(), 'f', 4); v.annotations()[ihdb_wt_idx] = QByteArray::number((double)(sys_sample_ids.count() - sys_hom_count - sys_het_count) / sys_sample_ids.count(), 'f', 4); v.annotations()[valid_idx] = val_status; v.annotations()[comment_idx] = comment.replace("\n", " ").replace("\t", " "); } else { v.annotations()[ihdb_hom_idx] = "n/a"; v.annotations()[ihdb_het_idx] = "n/a"; v.annotations()[ihdb_wt_idx] = "n/a"; v.annotations()[valid_idx] = "n/a"; v.annotations()[comment_idx] = "n/a"; } emit updateProgress(100*i/variants.count()); } }
void NGSD::annotateSomatic(VariantList& variants, QString filename) { //get sample ids QStringList samples = filename.split('-'); QString ts_id = sampleId(samples[0], false); //check if we could determine the sample if (ts_id=="") { Log::warn("Could not find processed sample in NGSD from name '" + QFileInfo(filename).baseName() + "'. Annotation will be incomplete because processing system could not be determined!"); } //remove all NGSD-specific columns QList<VariantAnnotationHeader> headers = variants.annotations(); foreach(const VariantAnnotationHeader& header, headers) { if (header.name().startsWith("som_ihdb")) { removeColumnIfPresent(variants, header.name(), true); } } //get required column indices int som_ihdb_c_idx = addColumn(variants, "som_ihdb_c", "Somatic variant count within NGSD."); int som_ihdb_p_idx = addColumn(variants, "som_ihdb_p", "Projects with somatic variant in NGSD."); //(re-)annotate the variants for (int i=0; i<variants.count(); ++i) { Variant& v = variants[i]; SqlQuery query = getQuery(); query.exec("SELECT s.id, dsv.processed_sample_id_tumor, p.name FROM detected_somatic_variant as dsv, variant as v, processed_sample ps, sample as s, project as p WHERE ps.project_id=p.id AND dsv.processed_sample_id_tumor=ps.id and dsv.variant_id=v.id AND ps.sample_id=s.id AND s.tumor='1' AND v.chr='"+v.chr().str()+"' AND v.start='"+QString::number(v.start())+"' AND v.end='"+QString::number(v.end())+"' AND v.ref='"+v.ref()+"' AND v.obs='"+v.obs()+"'"); //process variants QMap<QByteArray, int> project_map; QSet<QByteArray> processed_ps_ids; QSet<QByteArray> processed_s_ids; while(query.next()) { QByteArray current_sample = query.value(0).toByteArray(); QByteArray current_ps_id = query.value(1).toByteArray(); QByteArray current_project = query.value(2).toByteArray(); //skip already seen processed samples (there could be several variants because of indel window, but we want to process only one) if (processed_ps_ids.contains(current_ps_id)) continue; processed_ps_ids.insert(current_ps_id); //skip the current sample for general statistics if (current_sample==ts_id) continue; //skip already seen samples for general statistics (there could be several processings of the same sample because of different processing systems or because of experment repeats due to quality issues) if (processed_s_ids.contains(current_sample)) continue; processed_s_ids.insert(current_sample); // count if(!project_map.contains(current_project)) project_map.insert(current_project,0); ++project_map[current_project]; } QByteArray somatic_projects; int somatic_count = 0; QMap<QByteArray, int>::const_iterator j = project_map.constBegin(); while(j!=project_map.constEnd()) { somatic_count += j.value(); somatic_projects += j.key() + ","; ++j; } v.annotations()[som_ihdb_c_idx] = QByteArray::number(somatic_count); v.annotations()[som_ihdb_p_idx] = somatic_projects; } }