BedLine BedLine::fromString(QString str) { //normalize str = str.replace(',', ""); //remove thousands separator str = str.replace(':', '\t').replace('-', '\t'); //also accept "[c]:[s]-[e]" str = str.replace(QRegExp("[ ]+"), "\t"); //also accept "[c] [s] [e]" (with any number of spaces) //split QStringList parts = str.split('\t'); if (parts.count()<3) return BedLine(); //convert try { return BedLine(parts[0], Helper::toInt(parts[1]), Helper::toInt(parts[2])); } catch(...) { return BedLine(); } }
void BedFile::load(QString filename) { clear(); //parse from stream QSharedPointer<QFile> file = Helper::openFileForReading(filename, true); while(!file->atEnd()) { QByteArray line = file->readLine(); while (line.endsWith('\n') || line.endsWith('\r')) line.chop(1); //skip empty lines and header lines if(line.length()==0 || line.startsWith("track ") || line.startsWith("browser ")) continue; //error when less than 3 fields QList<QByteArray> fields = line.split('\t'); if (fields.count()<3) { THROW(FileParseException, "BED file line with less than three fields found: '" + line.trimmed() + "'"); } //error when chromosome is empty if (fields[0]=="") { THROW(FileParseException, "Empty BED file chromosome field '" + fields[0] + "'!"); } //error on position converion int start_pos = Helper::toInt(fields[1], "start position", line); int end_pos = Helper::toInt(fields[2], "end position", line); //create line QStringList annos; for (int i=3; i<fields.count(); ++i) { annos.append(fields[i]); } append(BedLine(fields[0], start_pos+1, end_pos, annos)); } }
void GeneSelectorDialog::updateGeneTable() { //clear details ui->details->clearContents(); //convert input to gene list QStringList genes = NGSHelper::textToGenes(ui->genes->toPlainText()); if (genes.isEmpty()) return; //set cursor QApplication::setOverrideCursor(QCursor(Qt::BusyCursor)); ui->details->blockSignals(true); //otherwise itemChanged is emitted //check for CNA results QString folder = QFileInfo(bam_file_).absolutePath(); QStringList files = Helper::findFiles(folder, "*_cnvs.seg", false); bool cna_result_present = (files.count()==1); //load CNA results BedFile cna_results; if (cna_result_present) { auto f = Helper::openFileForReading(files[0]); while(!f->atEnd()) { QString line = f->readLine(); //skip headers if (line.isEmpty() || line[0]!='\t') { continue; } //parse content QStringList parts = line.split('\t'); if (parts.count()<6) THROW(FileParseException, "SEG file line invalid: " + line); Chromosome chr(parts[1]); int start = Helper::toInt(parts[2], "SEG start position", line); int end = Helper::toInt(parts[3], "SEG end position", line); cna_results.append(BedLine(chr, start, end, QStringList() << parts[5])); } } //load low-coverage file for processing system files = Helper::findFiles(folder, "*_lowcov.bed", false); if(files.count()!=1) { updateError("Gene selection error", "Low-coverage BED file not found in " + folder); return; } BedFile sys_gaps; sys_gaps.load(files[0]); //load processing system target region NGSD db; QString sys_file = db.getProcessingSystem(bam_file_, NGSD::FILE); if (sys_file=="") { updateError("Gene selection error", "Processing system target region BED file not found in " + folder); return; } BedFile sys_roi; sys_roi.load(sys_file); //display genes ui->details->setRowCount(genes.count()); for (int r=0; r<genes.count(); ++r) { //convert gene to approved symbol QString gene = genes[r]; int gene_id = db.geneToApprovedID(gene); if(gene_id==-1) { updateError("Gene selection error", "Gene symbol '" + gene + "' is not an approved symbol!"); return; } gene = db.geneSymbol(gene_id); setGeneTableItem(r, 0, gene, Qt::AlignLeft, Qt::ItemIsUserCheckable|Qt::ItemIsEnabled); //transcript Transcript transcript = db.longestCodingTranscript(gene_id, Transcript::CCDS); if (!transcript.isValid()) //fallback to UCSC when no CCDS transcript is defined for the gene { transcript = db.longestCodingTranscript(gene_id, Transcript::UCSC); } BedFile region = transcript.regions(); setGeneTableItem(r, 1, transcript.name() + " (" + QString::number(region.count()) + " exons)"); //size region.extend(5); region.merge(); long long bases = region.baseCount(); setGeneTableItem(r, 2, QString::number(bases), Qt::AlignRight); //calculate gaps inside target region BedFile gaps = sys_gaps; gaps.intersect(region); //add target region bases not covered by processing system target file BedFile uncovered(region); uncovered.subtract(sys_roi); gaps.add(uncovered); gaps.merge(); //output (absolute and percentage) long long gap_bases = gaps.baseCount(); setGeneTableItem(r, 3, QString::number(gap_bases), Qt::AlignRight); setGeneTableItem(r, 4, QString::number(100.0 * gap_bases / bases, 'f', 2), Qt::AlignRight); //cnvs + cnv gaps if (!cna_result_present) { setGeneTableItem(r, 5, "n/a", Qt::AlignRight); setGeneTableItem(r, 6, "n/a", Qt::AlignRight); } else { BedFile cnv_data = cna_results; cnv_data.overlapping(region); cnv_data.sort(); int cnv_del = 0; int cnv_dup = 0; int cnv_bad_qc = 0; for(int i=0; i<cnv_data.count(); ++i) { QString cn = cnv_data[i].annotations()[0]; bool ok = false; int cn_num = cn.toInt(&ok); if (!ok) { ++cnv_bad_qc; } else if(cn_num<2) { ++cnv_del; } else if(cn_num>2) { ++cnv_dup; } } QStringList parts; if (cnv_del) parts << QString::number(cnv_del) + " del"; if (cnv_dup) parts << QString::number(cnv_dup) + " dup"; setGeneTableItem(r, 5, parts.join(", "), Qt::AlignRight, Qt::ItemIsUserCheckable|Qt::ItemIsEnabled); BedFile region_covered = region; region_covered.overlapping(cnv_data); int cnv_gaps = region.count() - region_covered.count(); parts.clear(); if (cnv_bad_qc) parts << QString::number(cnv_bad_qc) + " bad qc"; if (cnv_gaps) parts << QString::number(cnv_gaps) + " not covered"; setGeneTableItem(r, 6, parts.join(", "), Qt::AlignRight); } } //resize ui->details->resizeColumnsToContents(); ui->details->resizeRowsToContents(); //reset cursor QApplication::restoreOverrideCursor(); ui->details->blockSignals(false); updateSelectedGenesStatistics(); }