int NGSD::geneToApprovedID(const QByteArray& gene) { //init static SqlQuery q_gene = getQuery(true); static SqlQuery q_prev = getQuery(true); static SqlQuery q_syn = getQuery(true); static bool init = false; if (!init) { q_gene.prepare("SELECT id FROM gene WHERE symbol=:1"); q_prev.prepare("SELECT g.id FROM gene g, gene_alias ga WHERE g.id=ga.gene_id AND ga.symbol=:1 AND ga.type='previous'"); q_syn.prepare("SELECT g.id FROM gene g, gene_alias ga WHERE g.id=ga.gene_id AND ga.symbol=:1 AND ga.type='synonym'"); init = true; } //approved q_gene.bindValue(0, gene); q_gene.exec(); if (q_gene.size()==1) { q_gene.next(); return q_gene.value(0).toInt(); } //previous q_prev.bindValue(0, gene); q_prev.exec(); if (q_prev.size()==1) { q_prev.next(); return q_prev.value(0).toInt(); } else if(q_prev.size()>1) { return -1; } //synonymous q_syn.bindValue(0, gene); q_syn.exec(); if (q_syn.size()==1) { q_syn.next(); return q_syn.value(0).toInt(); } return -1; }
void NGSD::init(QString password) { //remove existing tables SqlQuery query = getQuery(); query.exec("SHOW TABLES"); if (query.size()>0) { //check password for re-init of production DB if (!test_db_ && password!=Settings::string("ngsd_pass")) { THROW(DatabaseException, "Password provided for re-initialization of procution database is incorrect!"); } //get table list QStringList tables; while(query.next()) { tables << query.value(0).toString(); } //remove old tables if (!tables.isEmpty()) { query.exec("SET FOREIGN_KEY_CHECKS = 0;"); query.exec("DROP TABLE " + tables.join(",")); query.exec("SET FOREIGN_KEY_CHECKS = 1;"); } } //initilize executeQueriesFromFile(":/resources/NGSD_schema.sql"); }
void NGSD::tableExists(QString table) { SqlQuery query = getQuery(); query.exec("SHOW TABLES LIKE '" + table + "'"); if (query.size()==0) { THROW(DatabaseException, "Table '" + table + "' does not exist!") } }
QStringList NGSD::getValues(const QString& query) { SqlQuery q = getQuery(); q.exec(query); QStringList output; output.reserve(q.size()); while(q.next()) { output.append(q.value(0).toString()); } return output; }
QPair<QString, QString> NGSD::getClassification(const Variant& variant) { SqlQuery query = getQuery(); query.exec("SELECT class, comment FROM variant_classification WHERE variant_id='" + variantId(variant) + "'"); if (query.size()==0) { return QPair<QString, QString>("n/a", ""); } else { query.next(); return QPair<QString, QString>(query.value(0).toString().trimmed(), query.value(1).toString().trimmed()); } }
ValidationInfo NGSD::getValidationStatus(const QString& filename, const Variant& variant) { SqlQuery query = getQuery(); query.exec("SELECT status, type, comment FROM variant_validation WHERE sample_id='" + sampleId(filename) + "' AND variant_id='" + variantId(variant) + "'"); if (query.size()==0) { return ValidationInfo(); } else { query.next(); return ValidationInfo{ query.value(0).toString().trimmed(), query.value(1).toString().trimmed(), query.value(2).toString().trimmed() }; } }
QVariant NGSD::getValue(const QString& query, bool no_value_is_ok) { SqlQuery q = getQuery(); q.exec(query); if (q.size()==0) { if (no_value_is_ok) { return QVariant(); } else { THROW(DatabaseException, "NGSD single value query returned no value: " + query); } } if (q.size()>1) { THROW(DatabaseException, "NGSD single value query returned several values: " + query); } q.next(); return q.value(0); }
QString NGSD::variantId(const Variant& variant, bool throw_if_fails) { SqlQuery query = getQuery(); //use binding user input (safety) query.prepare("SELECT id FROM variant WHERE chr=:chr AND start='"+QString::number(variant.start())+"' AND end='"+QString::number(variant.end())+"' AND ref=:ref AND obs=:obs"); query.bindValue(":chr", variant.chr().str()); query.bindValue(":ref", variant.ref()); query.bindValue(":obs", variant.obs()); query.exec(); if (query.size()==0) { if (throw_if_fails) { THROW(DatabaseException, "Variant " + variant.toString() + " not found in NGSD!"); } else { return "-1"; } } query.next(); return query.value(0).toString(); }
QString NGSD::sampleId(const QString& filename, bool throw_if_fails) { QStringList parts = QFileInfo(filename).baseName().append('_').split('_'); //get sample ID SqlQuery query = getQuery(); //use binding (user input) query.prepare("SELECT id FROM sample WHERE name=:sample"); query.bindValue(":sample", parts[0]); query.exec(); if (query.size()==0) { if(throw_if_fails) { THROW(DatabaseException, "Sample name '" + parts[0] + "' not found in NGSD!"); } else { return ""; } } query.next(); return query.value(0).toString(); }
QString NGSD::processedSampleId(const QString& filename, bool throw_if_fails) { QStringList parts = QFileInfo(filename).baseName().append('_').split('_'); //get sample ID SqlQuery query = getQuery(); //use binding (user input) query.prepare("SELECT ps.id FROM processed_sample ps, sample s WHERE s.name=:sample AND ps.sample_id=s.id AND ps.process_id=:psnum"); query.bindValue(":sample", parts[0]); query.bindValue(":psnum", QString::number(parts[1].toInt())); query.exec(); if (query.size()==0) { if(throw_if_fails) { THROW(DatabaseException, "Processed sample name '" + parts[0] + "_" + parts[1] + "' not found in NGSD!"); } else { return ""; } } query.next(); return query.value(0).toString(); }
QPair<QByteArray, QByteArray> NGSD::geneToApproved(const QByteArray& gene) { //init static SqlQuery q_gene = getQuery(true); static SqlQuery q_prev = getQuery(true); static SqlQuery q_syn = getQuery(true); static bool init = false; if (!init) { q_gene.prepare("SELECT id FROM gene WHERE symbol=:1"); q_prev.prepare("SELECT g.symbol FROM gene g, gene_alias ga WHERE g.id=ga.gene_id AND ga.symbol=:1 AND ga.type='previous'"); q_syn.prepare("SELECT g.symbol FROM gene g, gene_alias ga WHERE g.id=ga.gene_id AND ga.symbol=:1 AND ga.type='synonym'"); init = true; } //approved q_gene.bindValue(0, gene); q_gene.exec(); if (q_gene.size()==1) { q_gene.next(); return qMakePair(gene, QByteArray("KEPT: is approved symbol")); } //previous q_prev.bindValue(0, gene); q_prev.exec(); if (q_prev.size()==1) { q_prev.next(); return qMakePair(q_prev.value(0).toByteArray(), "REPLACED: " + gene + " is a previous symbol"); } else if(q_prev.size()>1) { QByteArray genes; while(q_prev.next()) { if (!genes.isEmpty()) genes.append(", "); genes.append(q_prev.value(0).toByteArray()); } return qMakePair(gene, "ERROR: is a previous symbol of the genes " + genes); } //synonymous q_syn.bindValue(0, gene); q_syn.exec(); if (q_syn.size()==1) { q_syn.next(); return qMakePair(q_syn.value(0).toByteArray(), "REPLACED: " + gene + " is a synonymous symbol"); } else if(q_syn.size()>1) { QByteArray genes; while(q_syn.next()) { if (!genes.isEmpty()) genes.append(", "); genes.append(q_syn.value(0).toByteArray()); } return qMakePair(gene, "ERROR: is a synonymous symbol of the genes " + genes); } return qMakePair(gene, QByteArray("ERROR: is unknown symbol")); }
void NGSD::annotate(VariantList& variants, QString filename) { initProgress("NGSD annotation", true); //get sample ids QString s_id = sampleId(filename, false); QString ps_id = processedSampleId(filename, false); QString sys_id = getValue("SELECT processing_system_id FROM processed_sample WHERE id='" + processedSampleId(filename, false) + "'").toString(); //check if we could determine the sample bool found_in_db = true; if (s_id=="" || ps_id=="" || sys_id=="") { Log::warn("Could not find processed sample in NGSD by name '" + filename + "'. Annotation will be incomplete because processing system could not be determined!"); found_in_db = false; } //get sample ids that have processed samples with the same processing system (not same sample, variants imported, same processing system, good quality of sample, not tumor) QSet<int> sys_sample_ids; SqlQuery tmp = getQuery(); tmp.exec("SELECT DISTINCT s.id FROM processed_sample as ps, sample s WHERE ps.processing_system_id='" + sys_id + "' AND ps.sample_id=s.id AND s.tumor='0' AND s.quality='good' AND s.id!='" + s_id + "' AND (SELECT count(id) FROM detected_variant as dv WHERE dv.processed_sample_id = ps.id)>0"); while(tmp.next()) { sys_sample_ids.insert(tmp.value(0).toInt()); } //remove all NGSD-specific columns QList<VariantAnnotationHeader> headers = variants.annotations(); foreach(const VariantAnnotationHeader& header, headers) { if (header.name().startsWith("ihdb_")) { removeColumnIfPresent(variants, header.name(), true); } } removeColumnIfPresent(variants, "classification", true); removeColumnIfPresent(variants, "classification_comment", true); removeColumnIfPresent(variants, "validated", true); removeColumnIfPresent(variants, "comment", true); //get required column indices QString num_samples = QString::number(sys_sample_ids.count()); int ihdb_hom_idx = addColumn(variants, "ihdb_hom", "Homozygous variant counts in NGSD for the same processing system (" + num_samples + " samples)."); int ihdb_het_idx = addColumn(variants, "ihdb_het", "Heterozyous variant counts in NGSD for the same processing system (" + num_samples + " samples)."); int ihdb_wt_idx = addColumn(variants, "ihdb_wt", "Wildtype variant counts in NGSD for the same processing system (" + num_samples + " samples)."); int ihdb_all_hom_idx = addColumn(variants, "ihdb_allsys_hom", "Homozygous variant counts in NGSD independent of the processing system."); int ihdb_all_het_idx = addColumn(variants, "ihdb_allsys_het", "Heterozygous variant counts in NGSD independent of the processing system."); int class_idx = addColumn(variants, "classification", "Classification from the NGSD."); int clacom_idx = addColumn(variants, "classification_comment", "Classification comment from the NGSD."); int valid_idx = addColumn(variants, "validated", "Validation information from the NGSD. Validation results of other samples are listed in brackets!"); if (variants.annotationIndexByName("comment", true, false)==-1) addColumn(variants, "comment", "Comments from the NGSD. Comments of other samples are listed in brackets!"); int comment_idx = variants.annotationIndexByName("comment", true, false); //(re-)annotate the variants SqlQuery query = getQuery(); for (int i=0; i<variants.count(); ++i) { //QTime timer; //timer.start(); //variant id Variant& v = variants[i]; QByteArray v_id = variantId(v, false).toLatin1(); //variant classification QVariant classification = getValue("SELECT class FROM variant_classification WHERE variant_id='" + v_id + "'", true); if (!classification.isNull()) { v.annotations()[class_idx] = classification.toByteArray().replace("n/a", ""); v.annotations()[clacom_idx] = getValue("SELECT comment FROM variant_classification WHERE variant_id='" + v_id + "'", true).toByteArray().replace("\n", " ").replace("\t", " "); } //int t_v = timer.elapsed(); //timer.restart(); //detected variant infos int dv_id = -1; QByteArray comment = ""; if (found_in_db) { query.exec("SELECT id, comment FROM detected_variant WHERE processed_sample_id='" + ps_id + "' AND variant_id='" + v_id + "'"); if (query.size()==1) { query.next(); dv_id = query.value(0).toInt(); comment = query.value(1).toByteArray(); } } //validation info int vv_id = -1; QByteArray val_status = ""; if (found_in_db) { query.exec("SELECT id, status FROM variant_validation WHERE sample_id='" + s_id + "' AND variant_id='" + v_id + "'"); if (query.size()==1) { query.next(); vv_id = query.value(0).toInt(); val_status = query.value(1).toByteArray().replace("n/a", ""); } } //int t_dv = timer.elapsed(); //timer.restart(); //validation info other samples int tps = 0; int fps = 0; query.exec("SELECT id, status FROM variant_validation WHERE variant_id='"+v_id+"' AND status!='n/a'"); while(query.next()) { if (query.value(0).toInt()==vv_id) continue; if (query.value(1).toByteArray()=="true positive") ++tps; else if (query.value(1).toByteArray()=="false positive") ++fps; } if (tps>0 || fps>0) { if (val_status=="") val_status = "n/a"; val_status += " (" + QByteArray::number(tps) + "xTP, " + QByteArray::number(fps) + "xFP)"; } //int t_val = timer.elapsed(); //timer.restart(); //comments other samples QList<QByteArray> comments; query.exec("SELECT id, comment FROM detected_variant WHERE variant_id='"+v_id+"' AND comment IS NOT NULL"); while(query.next()) { if (query.value(0).toInt()==dv_id) continue; QByteArray tmp = query.value(1).toByteArray().trimmed(); if (tmp!="") comments.append(tmp); } if (comments.size()>0) { if (comment=="") comment = "n/a"; comment += " ("; for (int i=0; i<comments.count(); ++i) { if (i>0) { comment += ", "; } comment += comments[i]; } comment += ")"; } //int t_com = timer.elapsed(); //timer.restart(); //genotype counts int allsys_hom_count = 0; int allsys_het_count = 0; int sys_hom_count = 0; int sys_het_count = 0; QSet<int> s_ids_done; int s_id_int = s_id.toInt(); query.exec("SELECT dv.genotype, ps.sample_id FROM detected_variant as dv, processed_sample ps WHERE dv.processed_sample_id=ps.id AND dv.variant_id='" + v_id + "'"); while(query.next()) { //skip this sample id int current_sample = query.value(1).toInt(); if (current_sample==s_id_int) continue; //skip already seen samples (there could be several processings of the same sample because of different processing systems or because of experment repeats due to quality issues) if (s_ids_done.contains(current_sample)) continue; s_ids_done.insert(current_sample); QByteArray current_geno = query.value(0).toByteArray(); if (current_geno=="hom") { ++allsys_hom_count; if (sys_sample_ids.contains(current_sample)) { ++sys_hom_count; } } else if (current_geno=="het") { ++allsys_het_count; if (sys_sample_ids.contains(current_sample)) { ++sys_het_count; } } } //qDebug() << (v.isSNV() ? "S" : "I") << query.size() << t_v << t_dv << t_val << t_com << timer.elapsed(); v.annotations()[ihdb_all_hom_idx] = QByteArray::number(allsys_hom_count); v.annotations()[ihdb_all_het_idx] = QByteArray::number(allsys_het_count); if (found_in_db) { v.annotations()[ihdb_hom_idx] = QByteArray::number((double)sys_hom_count / sys_sample_ids.count(), 'f', 4); v.annotations()[ihdb_het_idx] = QByteArray::number((double)sys_het_count / sys_sample_ids.count(), 'f', 4); v.annotations()[ihdb_wt_idx] = QByteArray::number((double)(sys_sample_ids.count() - sys_hom_count - sys_het_count) / sys_sample_ids.count(), 'f', 4); v.annotations()[valid_idx] = val_status; v.annotations()[comment_idx] = comment.replace("\n", " ").replace("\t", " "); } else { v.annotations()[ihdb_hom_idx] = "n/a"; v.annotations()[ihdb_het_idx] = "n/a"; v.annotations()[ihdb_wt_idx] = "n/a"; v.annotations()[valid_idx] = "n/a"; v.annotations()[comment_idx] = "n/a"; } emit updateProgress(100*i/variants.count()); } }