Beispiel #1
0
BedLine BedLine::fromString(QString str)
{
	//normalize
	str = str.replace(',', ""); //remove thousands separator
	str = str.replace(':', '\t').replace('-', '\t'); //also accept "[c]:[s]-[e]"
	str = str.replace(QRegExp("[ ]+"), "\t"); //also accept "[c] [s] [e]" (with any number of spaces)

	//split
	QStringList parts = str.split('\t');
	if (parts.count()<3) return BedLine();

	//convert
	try
	{
		return BedLine(parts[0], Helper::toInt(parts[1]), Helper::toInt(parts[2]));
	}
	catch(...)
	{
		return BedLine();
	}
}
Beispiel #2
0
void BedFile::load(QString filename)
{
	clear();

	//parse from stream
	QSharedPointer<QFile> file = Helper::openFileForReading(filename, true);
	while(!file->atEnd())
	{
		QByteArray line = file->readLine();
		while (line.endsWith('\n') || line.endsWith('\r')) line.chop(1);

		//skip empty lines and header lines
		if(line.length()==0 || line.startsWith("track ") || line.startsWith("browser ")) continue;

		//error when less than 3 fields
		QList<QByteArray> fields = line.split('\t');
		if (fields.count()<3)
		{
			THROW(FileParseException, "BED file line with less than three fields found: '" + line.trimmed() + "'");
		}

		//error when chromosome is empty
		if (fields[0]=="")
		{
			THROW(FileParseException, "Empty BED file chromosome field '" + fields[0] + "'!");
		}

		//error on position converion
		int start_pos = Helper::toInt(fields[1], "start position", line);
		int end_pos = Helper::toInt(fields[2], "end position", line);

		//create line
		QStringList annos;
		for (int i=3; i<fields.count(); ++i)
		{
			annos.append(fields[i]);
		}
		append(BedLine(fields[0], start_pos+1, end_pos, annos));
	}
}
Beispiel #3
0
void GeneSelectorDialog::updateGeneTable()
{
	//clear details
	ui->details->clearContents();

	//convert input to gene list
	QStringList genes = NGSHelper::textToGenes(ui->genes->toPlainText());
	if (genes.isEmpty()) return;

	//set cursor
	QApplication::setOverrideCursor(QCursor(Qt::BusyCursor));
	ui->details->blockSignals(true); //otherwise itemChanged is emitted

	//check for CNA results
	QString folder = QFileInfo(bam_file_).absolutePath();
	QStringList files = Helper::findFiles(folder, "*_cnvs.seg", false);
	bool cna_result_present = (files.count()==1);

	//load CNA results
	BedFile cna_results;
	if (cna_result_present)
	{
		auto f = Helper::openFileForReading(files[0]);
		while(!f->atEnd())
		{
			QString line = f->readLine();

			//skip headers
			if (line.isEmpty() || line[0]!='\t')
			{
				continue;
			}

			//parse content
			QStringList parts = line.split('\t');
			if (parts.count()<6) THROW(FileParseException, "SEG file line invalid: " + line);
			Chromosome chr(parts[1]);
			int start = Helper::toInt(parts[2], "SEG start position", line);
			int end = Helper::toInt(parts[3], "SEG end position", line);
			cna_results.append(BedLine(chr, start, end, QStringList() << parts[5]));
		}
	}

	//load low-coverage file for processing system
	files = Helper::findFiles(folder, "*_lowcov.bed", false);
	if(files.count()!=1)
	{
		updateError("Gene selection error", "Low-coverage BED file not found in " + folder);
		return;
	}
	BedFile sys_gaps;
	sys_gaps.load(files[0]);

	//load processing system target region
	NGSD db;
	QString sys_file = db.getProcessingSystem(bam_file_, NGSD::FILE);
	if (sys_file=="")
	{

		updateError("Gene selection error", "Processing system target region BED file not found in " + folder);
		return;
	}
	BedFile sys_roi;
	sys_roi.load(sys_file);

	//display genes
	ui->details->setRowCount(genes.count());
	for (int r=0; r<genes.count(); ++r)
	{
		//convert gene to approved symbol
		QString gene = genes[r];
		int gene_id = db.geneToApprovedID(gene);
		if(gene_id==-1)
		{
			updateError("Gene selection error", "Gene symbol '" + gene + "' is not an approved symbol!");
			return;
		}
		gene = db.geneSymbol(gene_id);
		setGeneTableItem(r, 0, gene, Qt::AlignLeft, Qt::ItemIsUserCheckable|Qt::ItemIsEnabled);

		//transcript
		Transcript transcript = db.longestCodingTranscript(gene_id, Transcript::CCDS);
		if (!transcript.isValid()) //fallback to UCSC when no CCDS transcript is defined for the gene
		{
			transcript = db.longestCodingTranscript(gene_id, Transcript::UCSC);
		}
		BedFile region = transcript.regions();
		setGeneTableItem(r, 1, transcript.name() + " (" + QString::number(region.count()) + " exons)");

		//size
		region.extend(5);
		region.merge();
		long long bases = region.baseCount();
		setGeneTableItem(r, 2, QString::number(bases), Qt::AlignRight);

		//calculate gaps inside target region
		BedFile gaps = sys_gaps;
		gaps.intersect(region);
		//add target region bases not covered by processing system target file
		BedFile uncovered(region);
		uncovered.subtract(sys_roi);
		gaps.add(uncovered);
		gaps.merge();
		//output (absolute and percentage)
		long long gap_bases = gaps.baseCount();
		setGeneTableItem(r, 3, QString::number(gap_bases), Qt::AlignRight);
		setGeneTableItem(r, 4, QString::number(100.0 * gap_bases / bases, 'f', 2), Qt::AlignRight);

		//cnvs + cnv gaps
		if (!cna_result_present)
		{
			setGeneTableItem(r, 5, "n/a", Qt::AlignRight);
			setGeneTableItem(r, 6, "n/a", Qt::AlignRight);
		}
		else
		{
			BedFile cnv_data = cna_results;
			cnv_data.overlapping(region);
			cnv_data.sort();
			int cnv_del = 0;
			int cnv_dup = 0;
			int cnv_bad_qc = 0;
			for(int i=0; i<cnv_data.count(); ++i)
			{
				QString cn = cnv_data[i].annotations()[0];

				bool ok = false;
				int cn_num = cn.toInt(&ok);
				if (!ok)
				{
					++cnv_bad_qc;
				}
				else if(cn_num<2)
				{
					++cnv_del;
				}
				else if(cn_num>2)
				{
					++cnv_dup;
				}
			}
			QStringList parts;
			if (cnv_del) parts << QString::number(cnv_del) + " del";
			if (cnv_dup) parts << QString::number(cnv_dup) + " dup";
			setGeneTableItem(r, 5, parts.join(", "), Qt::AlignRight, Qt::ItemIsUserCheckable|Qt::ItemIsEnabled);

			BedFile region_covered = region;
			region_covered.overlapping(cnv_data);
			int cnv_gaps = region.count() - region_covered.count();
			parts.clear();
			if (cnv_bad_qc) parts << QString::number(cnv_bad_qc) + " bad qc";
			if (cnv_gaps) parts << QString::number(cnv_gaps) + " not covered";

			setGeneTableItem(r, 6, parts.join(", "), Qt::AlignRight);
		}
	}

	//resize
	ui->details->resizeColumnsToContents();
	ui->details->resizeRowsToContents();

	//reset cursor
	QApplication::restoreOverrideCursor();
	ui->details->blockSignals(false);
	updateSelectedGenesStatistics();
}