Ejemplo n.º 1
0
void Formatter::format(double lambda, char new_separator, int& n_total, int& n_filtered) throw (FormatterException) {
	Descriptor* descriptor = NULL;

	char* line = NULL;
	int line_length = 0;
	unsigned int line_number = 2;
	char* token = NULL;
	char data_separator = '\0';

	const char* output_prefix = NULL;
	const char* file_name = NULL;
	char* o_gwafile_name = NULL;

	vector<double>* snp_hq = NULL;
	double maf_filter_value = -numeric_limits<double>::infinity();
	double oevar_imp_filter_value = -numeric_limits<double>::infinity();
	bool maf_filter = false;
	bool oevar_imp_filter = false;

	Column* column = NULL;

	ofstream ofile_stream;

	double d_value = 0.0;

	n_total = 0;
	n_filtered = 0;

	if (gwafile == NULL) {
		return;
	}

	try {
		descriptor = gwafile->get_descriptor();
		output_prefix = descriptor->get_property(Descriptor::PREFIX);
		file_name = descriptor->get_name();
		snp_hq = descriptor->get_threshold(Descriptor::SNP_HQ);
		data_separator = gwafile->get_data_separator();

		auxiliary::transform_file_name(&o_gwafile_name, output_prefix, file_name, NULL, true);
		if (o_gwafile_name == NULL) {
			throw FormatterException("Formatter", "format( double , char , int& , int& )", __LINE__, 10);
		}

		if (((maf_filter_value = snp_hq->at(0)) > 0) && (maf_column_pos >= 0)){
			maf_filter = true;
		}

		if (((oevar_imp_filter_value = snp_hq->at(1)) > 0) && (oevar_imp_column_pos >= 0)) {
			oevar_imp_filter = true;
		}

		for (columns_it = input_columns.begin(); columns_it != input_columns.end(); columns_it++) {
			output_columns.push_back(*columns_it);
		}

		if (!isnan(lambda)) {
			if (stderr_column != NULL) {
				column = new CorrectedStandardErrorColumn(stderr_column, lambda);
				column->set_header("%s_gc", stderr_column->get_header());
				column->set_order(descriptor->get_column_order(column->get_header(), gwafile->is_case_sensitive()));
				output_columns.push_back(column);
			}

			if (pvalue_column != NULL) {
				column = new CorrectedPvalueColumn(pvalue_column, lambda);
				column->set_header("%s_gc", pvalue_column->get_header());
				column->set_order(descriptor->get_column_order(column->get_header(), gwafile->is_case_sensitive()));
				output_columns.push_back(column);
			}
		}

		if ((oevar_imp_column != NULL) && (n_total_column != NULL)) {
			column = new EffectiveSampleSizeColumn(n_total_column, oevar_imp_column);
			column->set_header("%s_effective", n_total_column->get_header());
			column->set_order(descriptor->get_column_order(column->get_header(), gwafile->is_case_sensitive()));
			output_columns.push_back(column);
		}

		if (gwafile->is_order_on()) {
			if (descriptor->get_reordered_columns_number() > 0) {
				stable_sort(output_columns.begin(), output_columns.end(), compare_columns);
			} else {
				stable_sort(output_columns.begin(), output_columns.end(), compare_columns_by_name);
			}
		}

		ofile_stream.exceptions(ios_base::failbit | ios_base::badbit);
		ofile_stream.precision(numeric_limits<double>::digits10);

		try {
			ofile_stream.open(o_gwafile_name);
		} catch (ofstream::failure &e) {
			throw FormatterException("Formatter", "format( double , char , int& , int& )", __LINE__, 11, o_gwafile_name);
		}

		try {
			columns_it = output_columns.begin();
			if (columns_it != output_columns.end()) {
				ofile_stream << (*columns_it)->get_header();
				while (++columns_it != output_columns.end()) {
					ofile_stream << new_separator << (*columns_it)->get_header();
				}
				ofile_stream << endl;
			}

			if (maf_filter) {
				if (oevar_imp_filter) {
					// all filters
					while ((line_length = reader.read_line()) > 0) {
						line = *reader.line;

//						for (columns_it = input_columns.begin(); columns_it != input_columns.end(); (*columns_it)->char_value = auxiliary::strtok(&line, data_separator), columns_it++);
						for (columns_it = input_columns.begin(); columns_it != input_columns.end(); ++columns_it) {
							token = auxiliary::strtok(&line, data_separator);
							if (token == NULL) {
								(*columns_it)->char_value = "";
							} else {
								auxiliary::trim(&token);
								(*columns_it)->char_value = token;
							}
//							auxiliary::trim(&token);
//							(*columns_it)->char_value = token;
						}

						d_value = maf_column->get_numeric_value();
						d_value = d_value > 0.5 ? 1.0 - d_value : d_value;
						if (auxiliary::fcmp(d_value, maf_filter_value, EPSILON) == 1) {
							d_value = oevar_imp_column->get_numeric_value();
							if (auxiliary::fcmp(d_value, oevar_imp_filter_value, EPSILON) == 1) {
								columns_it = output_columns.begin();
								if (columns_it != output_columns.end()) {
									(*columns_it)->out(ofile_stream);
									while (++columns_it != output_columns.end()) {
										ofile_stream << new_separator;
										(*columns_it)->out(ofile_stream);
									}
									ofile_stream << endl;
								}

								n_filtered += 1;
							}
						}

						line_number += 1;
					}
				} else {
					// only maf filter
					while ((line_length = reader.read_line()) > 0) {
						line = *reader.line;

//						for (columns_it = input_columns.begin(); columns_it != input_columns.end(); (*columns_it)->char_value = auxiliary::strtok(&line, data_separator), columns_it++);
						for (columns_it = input_columns.begin(); columns_it != input_columns.end(); ++columns_it) {
							token = auxiliary::strtok(&line, data_separator);
							if (token == NULL) {
								(*columns_it)->char_value = "";
							} else {
								auxiliary::trim(&token);
								(*columns_it)->char_value = token;
							}
//							auxiliary::trim(&token);
//							(*columns_it)->char_value = token;
						}

						d_value = maf_column->get_numeric_value();
						d_value = d_value > 0.5 ? 1.0 - d_value : d_value;
						if (auxiliary::fcmp(d_value, maf_filter_value, EPSILON) == 1) {
							columns_it = output_columns.begin();
							if (columns_it != output_columns.end()) {
								(*columns_it)->out(ofile_stream);
								while (++columns_it != output_columns.end()) {
									ofile_stream << new_separator;
									(*columns_it)->out(ofile_stream);
								}
								ofile_stream << endl;
							}

							n_filtered += 1;
						}

						line_number += 1;
					}
				}
			} else if (oevar_imp_filter) {
				// only oevar_imp filter
				while ((line_length = reader.read_line()) > 0) {
					line = *reader.line;

//					for (columns_it = input_columns.begin(); columns_it != input_columns.end(); (*columns_it)->char_value = auxiliary::strtok(&line, data_separator), columns_it++);
					for (columns_it = input_columns.begin(); columns_it != input_columns.end(); ++columns_it) {
						token = auxiliary::strtok(&line, data_separator);
						if (token == NULL) {
							(*columns_it)->char_value = "";
						} else {
							auxiliary::trim(&token);
							(*columns_it)->char_value = token;
						}
//						auxiliary::trim(&token);
//						(*columns_it)->char_value = token;
					}

					d_value = oevar_imp_column->get_numeric_value();
					if (auxiliary::fcmp(d_value, oevar_imp_filter_value, EPSILON) == 1) {
						columns_it = output_columns.begin();
						if (columns_it != output_columns.end()) {
							(*columns_it)->out(ofile_stream);
							while (++columns_it != output_columns.end()) {
								ofile_stream << new_separator;
								(*columns_it)->out(ofile_stream);
							}
							ofile_stream << endl;
						}

						n_filtered += 1;
					}

					line_number += 1;
				}
			} else {
				// no filters
				while ((line_length = reader.read_line()) > 0) {
					line = *reader.line;

//					for (columns_it = input_columns.begin(); columns_it != input_columns.end(); (*columns_it)->char_value = auxiliary::strtok(&line, data_separator), columns_it++);
					for (columns_it = input_columns.begin(); columns_it != input_columns.end(); ++columns_it) {
						token = auxiliary::strtok(&line, data_separator);
						if (token == NULL) {
							(*columns_it)->char_value = "";
						} else {
							auxiliary::trim(&token);
							(*columns_it)->char_value = token;
						}
//						auxiliary::trim(&token);
//						(*columns_it)->char_value = token;
					}

					columns_it = output_columns.begin();
					if (columns_it != output_columns.end()) {
						(*columns_it)->out(ofile_stream);
						while (++columns_it != output_columns.end()) {
							ofile_stream << new_separator;
							(*columns_it)->out(ofile_stream);
						}
						ofile_stream << endl;
					}

					n_filtered += 1;

					line_number += 1;
				}
			}
		} catch (ofstream::failure &e) {
			throw FormatterException("Formatter", "format( double , char , int& , int& )", __LINE__, 13, o_gwafile_name);
		}

		try {
			ofile_stream.close();
		} catch (ofstream::failure &e) {
			throw FormatterException("Formatter", "format( double , char , int& , int& )", __LINE__, 12, o_gwafile_name);
		}

		n_total = line_number - 2;

		if (line_length == 0) {
			throw FormatterException("Formatter", "format( double , char , int& , int& )", __LINE__, 7, line_number, gwafile->get_descriptor()->get_name());
		}
	} catch (DescriptorException &e) {
		FormatterException new_e(e);
		e.add_message("Formatter", "format( double , char , int& , int& )", __LINE__, 14, gwafile->get_descriptor()->get_name());
		throw new_e;
	} catch (ColumnException &e) {
		FormatterException new_e(e);
		e.add_message("Formatter", "format( double , char , int& , int& )", __LINE__, 14, gwafile->get_descriptor()->get_name());
		throw new_e;
	} catch (FormatterException &e) {
		e.add_message("Formatter", "format( double , char , int& , int& )", __LINE__, 14, gwafile->get_descriptor()->get_name());
		throw;
	}
}
Ejemplo n.º 2
0
void Formatter::process_header() throw (FormatterException) {
	Descriptor* descriptor = NULL;
	char header_separator = '\0';
	char* header = NULL;
	char* token = NULL;
	int column_position = 0;
	const char* column_name = NULL;
	const char* new_column_name = NULL;

	Column* column = NULL;

	if (gwafile == NULL) {
		return;
	}

	try {
		if (reader.read_line() <= 0) {
			throw FormatterException("Formatter", "process_header()", __LINE__, 5, gwafile->get_descriptor()->get_name());
		}

		descriptor = gwafile->get_descriptor();
		header_separator = gwafile->get_header_separator();
		header = *reader.line;

		token = auxiliary::strtok(&header, header_separator);
		while (token != NULL) {
			column = new Column();

			new_column_name = descriptor->get_renamed_column(token);
			if (new_column_name == NULL) {
				new_column_name = token;
			}

			column_name = descriptor->get_default_column(new_column_name, gwafile->is_case_sensitive());
			if (column_name != NULL) {
				if (strcmp(column_name, Descriptor::MARKER) == 0) {
				} else if (strcmp(column_name, Descriptor::CHR) == 0) {
				} else if (strcmp(column_name, Descriptor::POSITION) == 0) {
				} else if (strcmp(column_name, Descriptor::ALLELE1) == 0) {
				} else if (strcmp(column_name, Descriptor::ALLELE2) == 0) {
				} else if (strcmp(column_name, Descriptor::STRAND) == 0) {
				} else if (strcmp(column_name, Descriptor::EFFECT) == 0) {
				} else if (strcmp(column_name, Descriptor::STDERR) == 0) {
					stderr_column = column;
				} else if (strcmp(column_name, Descriptor::PVALUE) == 0) {
					pvalue_column = column;
					pvalue_column_pos = column_position;
				} else if (strcmp(column_name, Descriptor::FREQLABEL) == 0) {
					maf_column = column;
					maf_column_pos = column_position;
				} else if (strcmp(column_name, Descriptor::HWE_PVAL) == 0) {
				} else if (strcmp(column_name, Descriptor::CALLRATE) == 0) {
				} else if (strcmp(column_name, Descriptor::N_TOTAL) == 0) {
					n_total_column = column;
				} else if (strcmp(column_name, Descriptor::IMPUTED) == 0) {
				} else if (strcmp(column_name, Descriptor::USED_FOR_IMP) == 0) {
				} else if (strcmp(column_name, Descriptor::OEVAR_IMP) == 0) {
					oevar_imp_column = column;
					oevar_imp_column_pos = column_position;
				} else if (strcmp(column_name, Descriptor::AVPOSTPROB) == 0) {
				}
			}

			column->set_header(new_column_name);
			column->set_order(descriptor->get_column_order(new_column_name, gwafile->is_case_sensitive()));

			input_columns.push_back(column);

			token = auxiliary::strtok(&header, header_separator);
			column_position += 1;
		}
	} catch (ReaderException &e) {
		FormatterException new_e(e);
		new_e.add_message("Formatter", "process_header()", __LINE__, 6, gwafile->get_descriptor()->get_name());
		throw new_e;
	} catch (DescriptorException &e) {
		FormatterException new_e(e);
		new_e.add_message("Formatter", "process_header()", __LINE__, 6, gwafile->get_descriptor()->get_name());
		throw new_e;
	}
}