void Annotator::annotate_with_map() throw (AnnotatorException) { Descriptor* descriptor = NULL; ofstream ofile_stream; const char* output_prefix = NULL; const char* file_name = NULL; char* output_file_name = NULL; char* line = NULL; int line_length = 0; unsigned int line_number = 2u; char* line_backup = NULL; char header_separator = '\0'; char data_separator = '\0'; char region_separator = '\0'; bool regions_append = false; char* token = NULL; int column_position = 0; const char* column_name = NULL; marker_index key_marker_index; marker_index* found_marker_index = NULL; unsigned int found_marker_index_pos = 0u; unsigned int index = 0u; char* chr = NULL; int position = 0; vector<double>* deviation = NULL; int deviation_value = 0; IntervalTree<char*>* genes_index = NULL; IntervalTree<char*> genes_index_subset; map<int, vector<char*>*> annotated_genes; map<int, vector<char*>*>::iterator annotated_genes_it; if (gwafile == NULL) { return; } try { descriptor = gwafile->get_descriptor(); output_prefix = descriptor->get_property(Descriptor::PREFIX); file_name = descriptor->get_name(); header_separator = gwafile->get_header_separator(); data_separator = gwafile->get_data_separator(); deviation = descriptor->get_threshold(Descriptor::REGIONS_DEVIATION); regions_append = gwafile->is_regions_append_on(); auxiliary::transform_file_name(&output_file_name, output_prefix, file_name, NULL, true); if (output_file_name == NULL) { throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 16); } if (regions_append) { line_backup = (char*)malloc(reader.get_buffer_size() * sizeof(char)); if (line_backup == NULL) { throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 2, (reader.get_buffer_size() * sizeof(char))); } } if (data_separator == ',') { region_separator = ';'; } else { region_separator = ','; } ofile_stream.exceptions(ios_base::failbit | ios_base::badbit); try { ofile_stream.open(output_file_name); } catch (ofstream::failure &e) { throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 17, output_file_name); } try { if (regions_append) { ofile_stream << header_backup; ofile_stream << header_separator; ofile_stream << ((column_name = descriptor->get_property(Descriptor::MAP_CHR)) != NULL ? column_name : Descriptor::MAP_CHR); ofile_stream << header_separator; ofile_stream << ((column_name = descriptor->get_property(Descriptor::MAP_POSITION)) != NULL ? column_name : Descriptor::MAP_POSITION); } else { ofile_stream << ((column_name = descriptor->get_column(Descriptor::MARKER)) != NULL ? column_name : Descriptor::MARKER); ofile_stream << header_separator; ofile_stream << ((column_name = descriptor->get_property(Descriptor::MAP_CHR)) != NULL ? column_name : Descriptor::MAP_CHR); ofile_stream << header_separator; ofile_stream << ((column_name = descriptor->get_property(Descriptor::MAP_POSITION)) != NULL ? column_name : Descriptor::MAP_POSITION); } for (unsigned int i = 0u; i < deviation->size(); ++i) { deviation_value = (int)deviation->at(i); if (deviation_value != 0) { ofile_stream << header_separator << "+/-" << deviation_value; } else { ofile_stream << header_separator << "IN"; } } ofile_stream << endl; while ((line_length = reader.read_line()) > 0) { line = *reader.line; if (regions_append) { strcpy(line_backup, line); } column_position = 0; key_marker_index.name = NULL; token = auxiliary::strtok(&line, data_separator); while (token != NULL) { if (column_position == marker_column_pos) { key_marker_index.name = token; } token = auxiliary::strtok(&line, data_separator); ++column_position; } if (column_position < total_columns) { throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 8, line_number, gwafile->get_descriptor()->get_name(), column_position, total_columns); } else if (column_position > total_columns) { throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 9, line_number, gwafile->get_descriptor()->get_name(), column_position, total_columns); } found_marker_index = (marker_index*)bsearch(&key_marker_index, map_index, map_index_size, sizeof(marker_index), qsort_marker_index_cmp); if (found_marker_index == NULL) { if (regions_append) { ofile_stream << line_backup << data_separator << "NA" << data_separator << "NA"; } else { ofile_stream << key_marker_index.name << data_separator << "NA" << data_separator << "NA"; } for (unsigned int i = 0u; i < deviation->size(); ++i) { ofile_stream << data_separator << "NA"; } ofile_stream << endl; } else { found_marker_index_pos = found_marker_index - map_index; while ((found_marker_index_pos < map_index_size) && (auxiliary::strcmp_ignore_case(key_marker_index.name, map_index[found_marker_index_pos].name) == 0)) { index = map_index[found_marker_index_pos].index; chr = map_chromosomes[index]; position = map_positions[index]; if (regions_append) { ofile_stream << line_backup << data_separator << chr << data_separator << position; } else { ofile_stream << key_marker_index.name << data_separator << chr << data_separator << position; } regions_indices_it = regions_indices.find(chr); if (regions_indices_it != regions_indices.end()) { genes_index = regions_indices_it->second; deviation_value = (int)deviation->back(); genes_index->get_intersecting_intervals(position - deviation_value, position + deviation_value, deviation_value, genes_index_subset); for (int i = deviation->size() - 2; i >= 0; --i) { deviation_value = (int)deviation->at(i); genes_index_subset.mark_intersecting_intervals(position - deviation_value, position + deviation_value, deviation_value); } genes_index_subset.get_marked_values(annotated_genes); if (annotated_genes.size() > 0) { for (unsigned int i = 0u; i < deviation->size(); ++i) { ofile_stream << data_separator; annotated_genes_it = annotated_genes.find(((int)deviation->at(i))); if (annotated_genes_it != annotated_genes.end()) { write_char_vector(ofile_stream, annotated_genes_it->second, region_separator); } else { ofile_stream << "NA"; } } ofile_stream << endl; } else { for (unsigned int i = 0u; i < deviation->size(); ++i) { ofile_stream << data_separator << "NA"; } ofile_stream << endl; } annotated_genes_it = annotated_genes.begin(); while(annotated_genes_it != annotated_genes.end()) { delete annotated_genes_it->second; annotated_genes_it++; } annotated_genes.clear(); genes_index_subset.clear(); } else { for (unsigned int i = 0u; i < deviation->size(); ++i) { ofile_stream << data_separator << "NA"; } ofile_stream << endl; } ++found_marker_index_pos; } } ++line_number; } } catch (ofstream::failure &e) { throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 19, output_file_name); } try { ofile_stream.close(); } catch (ofstream::failure &e) { throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 18, output_file_name); } if (line_backup != NULL) { free(line_backup); line_backup = NULL; } if (line_length == 0) { throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 13, line_number, gwafile->get_descriptor()->get_name()); } } catch (DescriptorException &e) { AnnotatorException new_e(e); new_e.add_message("Annotator", "annotate_with_map()", __LINE__, 14, gwafile->get_descriptor()->get_name()); throw new_e; } catch (ReaderException &e) { AnnotatorException new_e(e); new_e.add_message("Annotator", "annotate_with_map()", __LINE__, 14, gwafile->get_descriptor()->get_name()); throw new_e; } catch (AnnotatorException &e) { e.add_message("Annotator", "annotate_with_map()", __LINE__, 14, gwafile->get_descriptor()->get_name()); throw; } }
void Formatter::format(double lambda, char new_separator, int& n_total, int& n_filtered) throw (FormatterException) { Descriptor* descriptor = NULL; char* line = NULL; int line_length = 0; unsigned int line_number = 2; char* token = NULL; char data_separator = '\0'; const char* output_prefix = NULL; const char* file_name = NULL; char* o_gwafile_name = NULL; vector<double>* snp_hq = NULL; double maf_filter_value = -numeric_limits<double>::infinity(); double oevar_imp_filter_value = -numeric_limits<double>::infinity(); bool maf_filter = false; bool oevar_imp_filter = false; Column* column = NULL; ofstream ofile_stream; double d_value = 0.0; n_total = 0; n_filtered = 0; if (gwafile == NULL) { return; } try { descriptor = gwafile->get_descriptor(); output_prefix = descriptor->get_property(Descriptor::PREFIX); file_name = descriptor->get_name(); snp_hq = descriptor->get_threshold(Descriptor::SNP_HQ); data_separator = gwafile->get_data_separator(); auxiliary::transform_file_name(&o_gwafile_name, output_prefix, file_name, NULL, true); if (o_gwafile_name == NULL) { throw FormatterException("Formatter", "format( double , char , int& , int& )", __LINE__, 10); } if (((maf_filter_value = snp_hq->at(0)) > 0) && (maf_column_pos >= 0)){ maf_filter = true; } if (((oevar_imp_filter_value = snp_hq->at(1)) > 0) && (oevar_imp_column_pos >= 0)) { oevar_imp_filter = true; } for (columns_it = input_columns.begin(); columns_it != input_columns.end(); columns_it++) { output_columns.push_back(*columns_it); } if (!isnan(lambda)) { if (stderr_column != NULL) { column = new CorrectedStandardErrorColumn(stderr_column, lambda); column->set_header("%s_gc", stderr_column->get_header()); column->set_order(descriptor->get_column_order(column->get_header(), gwafile->is_case_sensitive())); output_columns.push_back(column); } if (pvalue_column != NULL) { column = new CorrectedPvalueColumn(pvalue_column, lambda); column->set_header("%s_gc", pvalue_column->get_header()); column->set_order(descriptor->get_column_order(column->get_header(), gwafile->is_case_sensitive())); output_columns.push_back(column); } } if ((oevar_imp_column != NULL) && (n_total_column != NULL)) { column = new EffectiveSampleSizeColumn(n_total_column, oevar_imp_column); column->set_header("%s_effective", n_total_column->get_header()); column->set_order(descriptor->get_column_order(column->get_header(), gwafile->is_case_sensitive())); output_columns.push_back(column); } if (gwafile->is_order_on()) { if (descriptor->get_reordered_columns_number() > 0) { stable_sort(output_columns.begin(), output_columns.end(), compare_columns); } else { stable_sort(output_columns.begin(), output_columns.end(), compare_columns_by_name); } } ofile_stream.exceptions(ios_base::failbit | ios_base::badbit); ofile_stream.precision(numeric_limits<double>::digits10); try { ofile_stream.open(o_gwafile_name); } catch (ofstream::failure &e) { throw FormatterException("Formatter", "format( double , char , int& , int& )", __LINE__, 11, o_gwafile_name); } try { columns_it = output_columns.begin(); if (columns_it != output_columns.end()) { ofile_stream << (*columns_it)->get_header(); while (++columns_it != output_columns.end()) { ofile_stream << new_separator << (*columns_it)->get_header(); } ofile_stream << endl; } if (maf_filter) { if (oevar_imp_filter) { // all filters while ((line_length = reader.read_line()) > 0) { line = *reader.line; // for (columns_it = input_columns.begin(); columns_it != input_columns.end(); (*columns_it)->char_value = auxiliary::strtok(&line, data_separator), columns_it++); for (columns_it = input_columns.begin(); columns_it != input_columns.end(); ++columns_it) { token = auxiliary::strtok(&line, data_separator); if (token == NULL) { (*columns_it)->char_value = ""; } else { auxiliary::trim(&token); (*columns_it)->char_value = token; } // auxiliary::trim(&token); // (*columns_it)->char_value = token; } d_value = maf_column->get_numeric_value(); d_value = d_value > 0.5 ? 1.0 - d_value : d_value; if (auxiliary::fcmp(d_value, maf_filter_value, EPSILON) == 1) { d_value = oevar_imp_column->get_numeric_value(); if (auxiliary::fcmp(d_value, oevar_imp_filter_value, EPSILON) == 1) { columns_it = output_columns.begin(); if (columns_it != output_columns.end()) { (*columns_it)->out(ofile_stream); while (++columns_it != output_columns.end()) { ofile_stream << new_separator; (*columns_it)->out(ofile_stream); } ofile_stream << endl; } n_filtered += 1; } } line_number += 1; } } else { // only maf filter while ((line_length = reader.read_line()) > 0) { line = *reader.line; // for (columns_it = input_columns.begin(); columns_it != input_columns.end(); (*columns_it)->char_value = auxiliary::strtok(&line, data_separator), columns_it++); for (columns_it = input_columns.begin(); columns_it != input_columns.end(); ++columns_it) { token = auxiliary::strtok(&line, data_separator); if (token == NULL) { (*columns_it)->char_value = ""; } else { auxiliary::trim(&token); (*columns_it)->char_value = token; } // auxiliary::trim(&token); // (*columns_it)->char_value = token; } d_value = maf_column->get_numeric_value(); d_value = d_value > 0.5 ? 1.0 - d_value : d_value; if (auxiliary::fcmp(d_value, maf_filter_value, EPSILON) == 1) { columns_it = output_columns.begin(); if (columns_it != output_columns.end()) { (*columns_it)->out(ofile_stream); while (++columns_it != output_columns.end()) { ofile_stream << new_separator; (*columns_it)->out(ofile_stream); } ofile_stream << endl; } n_filtered += 1; } line_number += 1; } } } else if (oevar_imp_filter) { // only oevar_imp filter while ((line_length = reader.read_line()) > 0) { line = *reader.line; // for (columns_it = input_columns.begin(); columns_it != input_columns.end(); (*columns_it)->char_value = auxiliary::strtok(&line, data_separator), columns_it++); for (columns_it = input_columns.begin(); columns_it != input_columns.end(); ++columns_it) { token = auxiliary::strtok(&line, data_separator); if (token == NULL) { (*columns_it)->char_value = ""; } else { auxiliary::trim(&token); (*columns_it)->char_value = token; } // auxiliary::trim(&token); // (*columns_it)->char_value = token; } d_value = oevar_imp_column->get_numeric_value(); if (auxiliary::fcmp(d_value, oevar_imp_filter_value, EPSILON) == 1) { columns_it = output_columns.begin(); if (columns_it != output_columns.end()) { (*columns_it)->out(ofile_stream); while (++columns_it != output_columns.end()) { ofile_stream << new_separator; (*columns_it)->out(ofile_stream); } ofile_stream << endl; } n_filtered += 1; } line_number += 1; } } else { // no filters while ((line_length = reader.read_line()) > 0) { line = *reader.line; // for (columns_it = input_columns.begin(); columns_it != input_columns.end(); (*columns_it)->char_value = auxiliary::strtok(&line, data_separator), columns_it++); for (columns_it = input_columns.begin(); columns_it != input_columns.end(); ++columns_it) { token = auxiliary::strtok(&line, data_separator); if (token == NULL) { (*columns_it)->char_value = ""; } else { auxiliary::trim(&token); (*columns_it)->char_value = token; } // auxiliary::trim(&token); // (*columns_it)->char_value = token; } columns_it = output_columns.begin(); if (columns_it != output_columns.end()) { (*columns_it)->out(ofile_stream); while (++columns_it != output_columns.end()) { ofile_stream << new_separator; (*columns_it)->out(ofile_stream); } ofile_stream << endl; } n_filtered += 1; line_number += 1; } } } catch (ofstream::failure &e) { throw FormatterException("Formatter", "format( double , char , int& , int& )", __LINE__, 13, o_gwafile_name); } try { ofile_stream.close(); } catch (ofstream::failure &e) { throw FormatterException("Formatter", "format( double , char , int& , int& )", __LINE__, 12, o_gwafile_name); } n_total = line_number - 2; if (line_length == 0) { throw FormatterException("Formatter", "format( double , char , int& , int& )", __LINE__, 7, line_number, gwafile->get_descriptor()->get_name()); } } catch (DescriptorException &e) { FormatterException new_e(e); e.add_message("Formatter", "format( double , char , int& , int& )", __LINE__, 14, gwafile->get_descriptor()->get_name()); throw new_e; } catch (ColumnException &e) { FormatterException new_e(e); e.add_message("Formatter", "format( double , char , int& , int& )", __LINE__, 14, gwafile->get_descriptor()->get_name()); throw new_e; } catch (FormatterException &e) { e.add_message("Formatter", "format( double , char , int& , int& )", __LINE__, 14, gwafile->get_descriptor()->get_name()); throw; } }
void Annotator::annotate_without_map() throw (AnnotatorException) { Descriptor* descriptor = NULL; ofstream ofile_stream; const char* output_prefix = NULL; const char* file_name = NULL; char* output_file_name = NULL; char* line = NULL; int line_length = 0; unsigned int line_number = 2u; char header_separator = '\0'; char data_separator = '\0'; char region_separator = '\0'; bool regions_append = false; char* token = NULL; char* end_ptr = NULL; int column_position = 0; const char* column_name = NULL; char* marker_token = NULL; char* chr_token = NULL; char* position_token = NULL; int position = 0; vector<double>* deviation = NULL; int deviation_value = 0; IntervalTree<char*>* genes_index = NULL; IntervalTree<char*> genes_index_subset; map<int, vector<char*>*> annotated_genes; map<int, vector<char*>*>::iterator annotated_genes_it; if (gwafile == NULL) { return; } try { descriptor = gwafile->get_descriptor(); output_prefix = descriptor->get_property(Descriptor::PREFIX); file_name = descriptor->get_name(); header_separator = gwafile->get_header_separator(); data_separator = gwafile->get_data_separator(); deviation = descriptor->get_threshold(Descriptor::REGIONS_DEVIATION); regions_append = gwafile->is_regions_append_on(); auxiliary::transform_file_name(&output_file_name, output_prefix, file_name, NULL, true); if (output_file_name == NULL) { throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 16); } if (data_separator == ',') { region_separator = ';'; } else { region_separator = ','; } ofile_stream.exceptions(ios_base::failbit | ios_base::badbit); try { ofile_stream.open(output_file_name); } catch (ofstream::failure &e) { throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 17, output_file_name); } try { if (regions_append) { ofile_stream << header_backup; } else { ofile_stream << ((column_name = descriptor->get_column(Descriptor::MARKER)) != NULL ? column_name : Descriptor::MARKER); ofile_stream << header_separator; ofile_stream << ((column_name = descriptor->get_column(Descriptor::CHR)) != NULL ? column_name : Descriptor::CHR); ofile_stream << header_separator; ofile_stream << ((column_name = descriptor->get_column(Descriptor::POSITION)) != NULL ? column_name : Descriptor::POSITION); } for (unsigned int i = 0u; i < deviation->size(); ++i) { deviation_value = (int)deviation->at(i); if (deviation_value != 0) { ofile_stream << header_separator << "+/-" << deviation_value; } else { ofile_stream << header_separator << "IN"; } } ofile_stream << endl; while ((line_length = reader.read_line()) > 0) { line = *reader.line; if (regions_append) { ofile_stream << line; } column_position = 0; marker_token = NULL; chr_token = NULL; position_token = NULL; token = auxiliary::strtok(&line, data_separator); while (token != NULL) { if (column_position == marker_column_pos) { marker_token = token; } else if (column_position == chr_column_pos) { chr_token = token; } else if (column_position == position_column_pos) { position_token = token; } token = auxiliary::strtok(&line, data_separator); ++column_position; } if (column_position < total_columns) { throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 8, line_number, gwafile->get_descriptor()->get_name(), column_position, total_columns); } else if (column_position > total_columns) { throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 9, line_number, gwafile->get_descriptor()->get_name(), column_position, total_columns); } position = (int)strtol(position_token, &end_ptr, 10); if (*end_ptr != '\0') { throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 10, position_token, ((column_name = descriptor->get_column(Descriptor::POSITION)) != NULL) ? column_name : Descriptor::POSITION, line_number); } if (position < 0) { throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 11, ((column_name = descriptor->get_column(Descriptor::POSITION)) != NULL) ? column_name : Descriptor::POSITION, position, line_number); } if (!regions_append) { ofile_stream << marker_token << data_separator << chr_token << data_separator << position_token; } regions_indices_it = regions_indices.find(chr_token); if (regions_indices_it != regions_indices.end()) { genes_index = regions_indices_it->second; deviation_value = (int)deviation->back(); genes_index->get_intersecting_intervals(position - deviation_value, position + deviation_value, deviation_value, genes_index_subset); for (int i = deviation->size() - 2; i >= 0; --i) { deviation_value = (int)deviation->at(i); genes_index_subset.mark_intersecting_intervals(position - deviation_value, position + deviation_value, deviation_value); } genes_index_subset.get_marked_values(annotated_genes); if (annotated_genes.size() > 0) { for (unsigned int i = 0u; i < deviation->size(); ++i) { ofile_stream << data_separator; annotated_genes_it = annotated_genes.find(((int)deviation->at(i))); if (annotated_genes_it != annotated_genes.end()) { write_char_vector(ofile_stream, annotated_genes_it->second, region_separator); } else { ofile_stream << "NA"; } } ofile_stream << endl; } else { for (unsigned int i = 0u; i < deviation->size(); ++i) { ofile_stream << data_separator << "NA"; } ofile_stream << endl; } annotated_genes_it = annotated_genes.begin(); while(annotated_genes_it != annotated_genes.end()) { delete annotated_genes_it->second; annotated_genes_it++; } annotated_genes.clear(); genes_index_subset.clear(); } else { for (unsigned int i = 0u; i < deviation->size(); ++i) { ofile_stream << data_separator << "NA"; } ofile_stream << endl; } ++line_number; } } catch (ofstream::failure &e) { throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 19, output_file_name); } try { ofile_stream.close(); } catch (ofstream::failure &e) { throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 18, output_file_name); } if (line_length == 0) { throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 13, line_number, gwafile->get_descriptor()->get_name()); } } catch (DescriptorException &e) { AnnotatorException new_e(e); new_e.add_message("Annotator", "annotate_without_map()", __LINE__, 14, gwafile->get_descriptor()->get_name()); throw new_e; } catch (ReaderException &e) { AnnotatorException new_e(e); new_e.add_message("Annotator", "annotate_without_map()", __LINE__, 14, gwafile->get_descriptor()->get_name()); throw new_e; } catch (AnnotatorException &e) { e.add_message("Annotator", "annotate_without_map()", __LINE__, 14, gwafile->get_descriptor()->get_name()); throw; } }
double Formatter::calculate_lambda(int& n_total, int& n_filtered) throw (FormatterException) { Descriptor* descriptor = NULL; char* line = NULL; int line_length = 0; unsigned int line_number = 2; char data_separator = '\0'; vector<double>* snp_hq = NULL; double maf_filter_value = -numeric_limits<double>::infinity(); double oevar_imp_filter_value = -numeric_limits<double>::infinity(); bool maf_filter = false; bool oevar_imp_filter = false; char* token = NULL; char* pvalue_token = NULL; char* maf_token = NULL; char* oevar_imp_token = NULL; int column_position = 0; char* end_ptr = NULL; double d_value = 0.0; int n = 0; double* data = NULL; double* new_data = NULL; int current_heap_size = HEAP_SIZE; double lambda = numeric_limits<double>::quiet_NaN(); n_total = 0; n_filtered = 0; if ((gwafile == NULL) || (pvalue_column_pos < 0)) { return lambda; } try { descriptor = gwafile->get_descriptor(); snp_hq = descriptor->get_threshold(Descriptor::SNP_HQ); data_separator = gwafile->get_data_separator(); if (((maf_filter_value = snp_hq->at(0)) > 0) && (maf_column_pos >= 0)){ maf_filter = true; } if (((oevar_imp_filter_value = snp_hq->at(1)) > 0) && (oevar_imp_column_pos >= 0)) { oevar_imp_filter = true; } data = (double*)malloc(HEAP_SIZE * sizeof(double)); if (data == NULL) { throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 2, HEAP_SIZE * sizeof(double)); } if (maf_filter) { if (oevar_imp_filter) { // all filters while ((line_length = reader.read_line()) > 0) { line = *reader.line; column_position = 0; pvalue_token = NULL; maf_token = NULL; oevar_imp_token = NULL; token = auxiliary::strtok(&line, data_separator); while (token != NULL) { if (column_position == pvalue_column_pos) { auxiliary::trim(&token); pvalue_token = token; } else if (column_position == maf_column_pos) { auxiliary::trim(&token); maf_token = token; } else if (column_position == oevar_imp_column_pos) { auxiliary::trim(&token); oevar_imp_token = token; } token = auxiliary::strtok(&line, data_separator); column_position += 1; } if (pvalue_token == NULL) { throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 8, pvalue_column->get_header(), line_number, gwafile->get_descriptor()->get_name()); } if (maf_token == NULL) { throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 8, maf_column->get_header(), line_number, gwafile->get_descriptor()->get_name()); } if (oevar_imp_token == NULL) { throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 8, oevar_imp_column->get_header(), line_number, gwafile->get_descriptor()->get_name()); } d_value = R_strtod(maf_token, &end_ptr); if ((*end_ptr != '\0') || (isnan(d_value))) { line_number += 1; continue; } d_value = d_value > 0.5 ? 1.0 - d_value : d_value; if (auxiliary::fcmp(d_value, maf_filter_value, EPSILON) == 1) { d_value = R_strtod(oevar_imp_token, &end_ptr); if ((*end_ptr != '\0') || (isnan(d_value))) { line_number += 1; continue; } if (auxiliary::fcmp(d_value, oevar_imp_filter_value, EPSILON) == 1) { d_value = R_strtod(pvalue_token, &end_ptr); if ((*end_ptr != '\0') || (isnan(d_value))) { line_number += 1; continue; } n += 1; if (n > current_heap_size) { current_heap_size += HEAP_INCREMENT; new_data = (double*)realloc(data, current_heap_size * sizeof(double)); if (new_data == NULL) { free(data); data = NULL; throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 2, current_heap_size * sizeof(double)); } data = new_data; } data[n - 1] = d_value; } } line_number += 1; } } else { // only maf filter while ((line_length = reader.read_line()) > 0) { line = *reader.line; column_position = 0; pvalue_token = NULL; maf_token = NULL; token = auxiliary::strtok(&line, data_separator); while (token != NULL) { if (column_position == pvalue_column_pos) { auxiliary::trim(&token); pvalue_token = token; } else if (column_position == maf_column_pos) { auxiliary::trim(&token); maf_token = token; } token = auxiliary::strtok(&line, data_separator); column_position += 1; } if (pvalue_token == NULL) { throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 8, pvalue_column->get_header(), line_number, gwafile->get_descriptor()->get_name()); } if (maf_token == NULL) { throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 8, maf_column->get_header(), line_number, gwafile->get_descriptor()->get_name()); } d_value = R_strtod(maf_token, &end_ptr); if ((*end_ptr != '\0') || (isnan(d_value))) { line_number += 1; continue; } d_value = d_value > 0.5 ? 1.0 - d_value : d_value; if (auxiliary::fcmp(d_value, maf_filter_value, EPSILON) == 1) { d_value = R_strtod(pvalue_token, &end_ptr); if ((*end_ptr != '\0') || (isnan(d_value))) { line_number += 1; continue; } n += 1; if (n > current_heap_size) { current_heap_size += HEAP_INCREMENT; new_data = (double*)realloc(data, current_heap_size * sizeof(double)); if (new_data == NULL) { free(data); data = NULL; throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 2, current_heap_size * sizeof(double)); } data = new_data; } data[n - 1] = d_value; } line_number += 1; } } } else if (oevar_imp_filter) { // only oevar_imp_filter while ((line_length = reader.read_line()) > 0) { line = *reader.line; column_position = 0; pvalue_token = NULL; oevar_imp_token = NULL; token = auxiliary::strtok(&line, data_separator); while (token != NULL) { if (column_position == pvalue_column_pos) { auxiliary::trim(&token); pvalue_token = token; } else if (column_position == oevar_imp_column_pos) { auxiliary::trim(&token); oevar_imp_token = token; } token = auxiliary::strtok(&line, data_separator); column_position += 1; } if (pvalue_token == NULL) { throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 8, pvalue_column->get_header(), line_number, gwafile->get_descriptor()->get_name()); } if (oevar_imp_token == NULL) { throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 8, oevar_imp_column->get_header(), line_number, gwafile->get_descriptor()->get_name()); } d_value = R_strtod(oevar_imp_token, &end_ptr); if ((*end_ptr != '\0') || (isnan(d_value))) { line_number += 1; continue; } if (auxiliary::fcmp(d_value, oevar_imp_filter_value, EPSILON) == 1) { d_value = R_strtod(pvalue_token, &end_ptr); if ((*end_ptr != '\0') || (isnan(d_value))) { line_number += 1; continue; } n += 1; if (n > current_heap_size) { current_heap_size += HEAP_INCREMENT; new_data = (double*)realloc(data, current_heap_size * sizeof(double)); if (new_data == NULL) { free(data); data = NULL; throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 2, current_heap_size * sizeof(double)); } data = new_data; } data[n - 1] = d_value; } line_number += 1; } } else { // no filters while ((line_length = reader.read_line()) > 0) { line = *reader.line; column_position = 0; pvalue_token = NULL; token = auxiliary::strtok(&line, data_separator); while (token != NULL) { if (column_position == pvalue_column_pos) { auxiliary::trim(&token); pvalue_token = token; break; } token = auxiliary::strtok(&line, data_separator); column_position += 1; } if (pvalue_token == NULL) { throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 8, pvalue_column->get_header(), line_number, gwafile->get_descriptor()->get_name()); } d_value = R_strtod(pvalue_token, &end_ptr); if ((*end_ptr != '\0') || (isnan(d_value))) { line_number += 1; continue; } n += 1; if (n > current_heap_size) { current_heap_size += HEAP_INCREMENT; new_data = (double*)realloc(data, current_heap_size * sizeof(double)); if (new_data == NULL) { free(data); data = NULL; throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 2, current_heap_size * sizeof(double)); } data = new_data; } data[n - 1] = d_value; line_number += 1; } } n_filtered = n; n_total = line_number - 2; if (line_length == 0) { throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 7, line_number, gwafile->get_descriptor()->get_name()); } reader.reset(); if (reader.read_line() <= 0) { throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 5, line_number, gwafile->get_descriptor()->get_name()); } } catch (DescriptorException &e) { FormatterException new_e(e); new_e.add_message("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 9, gwafile->get_descriptor()->get_name()); throw new_e; } catch (ReaderException &e) { FormatterException new_e(e); new_e.add_message("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 9, gwafile->get_descriptor()->get_name()); throw new_e; } catch (FormatterException &e) { e.add_message("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 9, gwafile->get_descriptor()->get_name()); throw; } if (n > 0) { for (int i = 0; i < n; i++) { data[i] = pow(Rf_qnorm5(0.5 * data[i], 0.0, 1.0, 0, 0), 2.0); } qsort(data, n, sizeof(double), auxiliary::dblcmp); lambda = auxiliary::stats_median_from_sorted_data(data, n) / Rf_qchisq(0.5, 1.0, 0, 0); } free(data); data = NULL; return lambda; }