void Formatter::format(double lambda, char new_separator, int& n_total, int& n_filtered) throw (FormatterException) { Descriptor* descriptor = NULL; char* line = NULL; int line_length = 0; unsigned int line_number = 2; char* token = NULL; char data_separator = '\0'; const char* output_prefix = NULL; const char* file_name = NULL; char* o_gwafile_name = NULL; vector<double>* snp_hq = NULL; double maf_filter_value = -numeric_limits<double>::infinity(); double oevar_imp_filter_value = -numeric_limits<double>::infinity(); bool maf_filter = false; bool oevar_imp_filter = false; Column* column = NULL; ofstream ofile_stream; double d_value = 0.0; n_total = 0; n_filtered = 0; if (gwafile == NULL) { return; } try { descriptor = gwafile->get_descriptor(); output_prefix = descriptor->get_property(Descriptor::PREFIX); file_name = descriptor->get_name(); snp_hq = descriptor->get_threshold(Descriptor::SNP_HQ); data_separator = gwafile->get_data_separator(); auxiliary::transform_file_name(&o_gwafile_name, output_prefix, file_name, NULL, true); if (o_gwafile_name == NULL) { throw FormatterException("Formatter", "format( double , char , int& , int& )", __LINE__, 10); } if (((maf_filter_value = snp_hq->at(0)) > 0) && (maf_column_pos >= 0)){ maf_filter = true; } if (((oevar_imp_filter_value = snp_hq->at(1)) > 0) && (oevar_imp_column_pos >= 0)) { oevar_imp_filter = true; } for (columns_it = input_columns.begin(); columns_it != input_columns.end(); columns_it++) { output_columns.push_back(*columns_it); } if (!isnan(lambda)) { if (stderr_column != NULL) { column = new CorrectedStandardErrorColumn(stderr_column, lambda); column->set_header("%s_gc", stderr_column->get_header()); column->set_order(descriptor->get_column_order(column->get_header(), gwafile->is_case_sensitive())); output_columns.push_back(column); } if (pvalue_column != NULL) { column = new CorrectedPvalueColumn(pvalue_column, lambda); column->set_header("%s_gc", pvalue_column->get_header()); column->set_order(descriptor->get_column_order(column->get_header(), gwafile->is_case_sensitive())); output_columns.push_back(column); } } if ((oevar_imp_column != NULL) && (n_total_column != NULL)) { column = new EffectiveSampleSizeColumn(n_total_column, oevar_imp_column); column->set_header("%s_effective", n_total_column->get_header()); column->set_order(descriptor->get_column_order(column->get_header(), gwafile->is_case_sensitive())); output_columns.push_back(column); } if (gwafile->is_order_on()) { if (descriptor->get_reordered_columns_number() > 0) { stable_sort(output_columns.begin(), output_columns.end(), compare_columns); } else { stable_sort(output_columns.begin(), output_columns.end(), compare_columns_by_name); } } ofile_stream.exceptions(ios_base::failbit | ios_base::badbit); ofile_stream.precision(numeric_limits<double>::digits10); try { ofile_stream.open(o_gwafile_name); } catch (ofstream::failure &e) { throw FormatterException("Formatter", "format( double , char , int& , int& )", __LINE__, 11, o_gwafile_name); } try { columns_it = output_columns.begin(); if (columns_it != output_columns.end()) { ofile_stream << (*columns_it)->get_header(); while (++columns_it != output_columns.end()) { ofile_stream << new_separator << (*columns_it)->get_header(); } ofile_stream << endl; } if (maf_filter) { if (oevar_imp_filter) { // all filters while ((line_length = reader.read_line()) > 0) { line = *reader.line; // for (columns_it = input_columns.begin(); columns_it != input_columns.end(); (*columns_it)->char_value = auxiliary::strtok(&line, data_separator), columns_it++); for (columns_it = input_columns.begin(); columns_it != input_columns.end(); ++columns_it) { token = auxiliary::strtok(&line, data_separator); if (token == NULL) { (*columns_it)->char_value = ""; } else { auxiliary::trim(&token); (*columns_it)->char_value = token; } // auxiliary::trim(&token); // (*columns_it)->char_value = token; } d_value = maf_column->get_numeric_value(); d_value = d_value > 0.5 ? 1.0 - d_value : d_value; if (auxiliary::fcmp(d_value, maf_filter_value, EPSILON) == 1) { d_value = oevar_imp_column->get_numeric_value(); if (auxiliary::fcmp(d_value, oevar_imp_filter_value, EPSILON) == 1) { columns_it = output_columns.begin(); if (columns_it != output_columns.end()) { (*columns_it)->out(ofile_stream); while (++columns_it != output_columns.end()) { ofile_stream << new_separator; (*columns_it)->out(ofile_stream); } ofile_stream << endl; } n_filtered += 1; } } line_number += 1; } } else { // only maf filter while ((line_length = reader.read_line()) > 0) { line = *reader.line; // for (columns_it = input_columns.begin(); columns_it != input_columns.end(); (*columns_it)->char_value = auxiliary::strtok(&line, data_separator), columns_it++); for (columns_it = input_columns.begin(); columns_it != input_columns.end(); ++columns_it) { token = auxiliary::strtok(&line, data_separator); if (token == NULL) { (*columns_it)->char_value = ""; } else { auxiliary::trim(&token); (*columns_it)->char_value = token; } // auxiliary::trim(&token); // (*columns_it)->char_value = token; } d_value = maf_column->get_numeric_value(); d_value = d_value > 0.5 ? 1.0 - d_value : d_value; if (auxiliary::fcmp(d_value, maf_filter_value, EPSILON) == 1) { columns_it = output_columns.begin(); if (columns_it != output_columns.end()) { (*columns_it)->out(ofile_stream); while (++columns_it != output_columns.end()) { ofile_stream << new_separator; (*columns_it)->out(ofile_stream); } ofile_stream << endl; } n_filtered += 1; } line_number += 1; } } } else if (oevar_imp_filter) { // only oevar_imp filter while ((line_length = reader.read_line()) > 0) { line = *reader.line; // for (columns_it = input_columns.begin(); columns_it != input_columns.end(); (*columns_it)->char_value = auxiliary::strtok(&line, data_separator), columns_it++); for (columns_it = input_columns.begin(); columns_it != input_columns.end(); ++columns_it) { token = auxiliary::strtok(&line, data_separator); if (token == NULL) { (*columns_it)->char_value = ""; } else { auxiliary::trim(&token); (*columns_it)->char_value = token; } // auxiliary::trim(&token); // (*columns_it)->char_value = token; } d_value = oevar_imp_column->get_numeric_value(); if (auxiliary::fcmp(d_value, oevar_imp_filter_value, EPSILON) == 1) { columns_it = output_columns.begin(); if (columns_it != output_columns.end()) { (*columns_it)->out(ofile_stream); while (++columns_it != output_columns.end()) { ofile_stream << new_separator; (*columns_it)->out(ofile_stream); } ofile_stream << endl; } n_filtered += 1; } line_number += 1; } } else { // no filters while ((line_length = reader.read_line()) > 0) { line = *reader.line; // for (columns_it = input_columns.begin(); columns_it != input_columns.end(); (*columns_it)->char_value = auxiliary::strtok(&line, data_separator), columns_it++); for (columns_it = input_columns.begin(); columns_it != input_columns.end(); ++columns_it) { token = auxiliary::strtok(&line, data_separator); if (token == NULL) { (*columns_it)->char_value = ""; } else { auxiliary::trim(&token); (*columns_it)->char_value = token; } // auxiliary::trim(&token); // (*columns_it)->char_value = token; } columns_it = output_columns.begin(); if (columns_it != output_columns.end()) { (*columns_it)->out(ofile_stream); while (++columns_it != output_columns.end()) { ofile_stream << new_separator; (*columns_it)->out(ofile_stream); } ofile_stream << endl; } n_filtered += 1; line_number += 1; } } } catch (ofstream::failure &e) { throw FormatterException("Formatter", "format( double , char , int& , int& )", __LINE__, 13, o_gwafile_name); } try { ofile_stream.close(); } catch (ofstream::failure &e) { throw FormatterException("Formatter", "format( double , char , int& , int& )", __LINE__, 12, o_gwafile_name); } n_total = line_number - 2; if (line_length == 0) { throw FormatterException("Formatter", "format( double , char , int& , int& )", __LINE__, 7, line_number, gwafile->get_descriptor()->get_name()); } } catch (DescriptorException &e) { FormatterException new_e(e); e.add_message("Formatter", "format( double , char , int& , int& )", __LINE__, 14, gwafile->get_descriptor()->get_name()); throw new_e; } catch (ColumnException &e) { FormatterException new_e(e); e.add_message("Formatter", "format( double , char , int& , int& )", __LINE__, 14, gwafile->get_descriptor()->get_name()); throw new_e; } catch (FormatterException &e) { e.add_message("Formatter", "format( double , char , int& , int& )", __LINE__, 14, gwafile->get_descriptor()->get_name()); throw; } }
void Formatter::process_header() throw (FormatterException) { Descriptor* descriptor = NULL; char header_separator = '\0'; char* header = NULL; char* token = NULL; int column_position = 0; const char* column_name = NULL; const char* new_column_name = NULL; Column* column = NULL; if (gwafile == NULL) { return; } try { if (reader.read_line() <= 0) { throw FormatterException("Formatter", "process_header()", __LINE__, 5, gwafile->get_descriptor()->get_name()); } descriptor = gwafile->get_descriptor(); header_separator = gwafile->get_header_separator(); header = *reader.line; token = auxiliary::strtok(&header, header_separator); while (token != NULL) { column = new Column(); new_column_name = descriptor->get_renamed_column(token); if (new_column_name == NULL) { new_column_name = token; } column_name = descriptor->get_default_column(new_column_name, gwafile->is_case_sensitive()); if (column_name != NULL) { if (strcmp(column_name, Descriptor::MARKER) == 0) { } else if (strcmp(column_name, Descriptor::CHR) == 0) { } else if (strcmp(column_name, Descriptor::POSITION) == 0) { } else if (strcmp(column_name, Descriptor::ALLELE1) == 0) { } else if (strcmp(column_name, Descriptor::ALLELE2) == 0) { } else if (strcmp(column_name, Descriptor::STRAND) == 0) { } else if (strcmp(column_name, Descriptor::EFFECT) == 0) { } else if (strcmp(column_name, Descriptor::STDERR) == 0) { stderr_column = column; } else if (strcmp(column_name, Descriptor::PVALUE) == 0) { pvalue_column = column; pvalue_column_pos = column_position; } else if (strcmp(column_name, Descriptor::FREQLABEL) == 0) { maf_column = column; maf_column_pos = column_position; } else if (strcmp(column_name, Descriptor::HWE_PVAL) == 0) { } else if (strcmp(column_name, Descriptor::CALLRATE) == 0) { } else if (strcmp(column_name, Descriptor::N_TOTAL) == 0) { n_total_column = column; } else if (strcmp(column_name, Descriptor::IMPUTED) == 0) { } else if (strcmp(column_name, Descriptor::USED_FOR_IMP) == 0) { } else if (strcmp(column_name, Descriptor::OEVAR_IMP) == 0) { oevar_imp_column = column; oevar_imp_column_pos = column_position; } else if (strcmp(column_name, Descriptor::AVPOSTPROB) == 0) { } } column->set_header(new_column_name); column->set_order(descriptor->get_column_order(new_column_name, gwafile->is_case_sensitive())); input_columns.push_back(column); token = auxiliary::strtok(&header, header_separator); column_position += 1; } } catch (ReaderException &e) { FormatterException new_e(e); new_e.add_message("Formatter", "process_header()", __LINE__, 6, gwafile->get_descriptor()->get_name()); throw new_e; } catch (DescriptorException &e) { FormatterException new_e(e); new_e.add_message("Formatter", "process_header()", __LINE__, 6, gwafile->get_descriptor()->get_name()); throw new_e; } }