void Annotator::annotate_without_map() throw (AnnotatorException) { Descriptor* descriptor = NULL; ofstream ofile_stream; const char* output_prefix = NULL; const char* file_name = NULL; char* output_file_name = NULL; char* line = NULL; int line_length = 0; unsigned int line_number = 2u; char header_separator = '\0'; char data_separator = '\0'; char region_separator = '\0'; bool regions_append = false; char* token = NULL; char* end_ptr = NULL; int column_position = 0; const char* column_name = NULL; char* marker_token = NULL; char* chr_token = NULL; char* position_token = NULL; int position = 0; vector<double>* deviation = NULL; int deviation_value = 0; IntervalTree<char*>* genes_index = NULL; IntervalTree<char*> genes_index_subset; map<int, vector<char*>*> annotated_genes; map<int, vector<char*>*>::iterator annotated_genes_it; if (gwafile == NULL) { return; } try { descriptor = gwafile->get_descriptor(); output_prefix = descriptor->get_property(Descriptor::PREFIX); file_name = descriptor->get_name(); header_separator = gwafile->get_header_separator(); data_separator = gwafile->get_data_separator(); deviation = descriptor->get_threshold(Descriptor::REGIONS_DEVIATION); regions_append = gwafile->is_regions_append_on(); auxiliary::transform_file_name(&output_file_name, output_prefix, file_name, NULL, true); if (output_file_name == NULL) { throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 16); } if (data_separator == ',') { region_separator = ';'; } else { region_separator = ','; } ofile_stream.exceptions(ios_base::failbit | ios_base::badbit); try { ofile_stream.open(output_file_name); } catch (ofstream::failure &e) { throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 17, output_file_name); } try { if (regions_append) { ofile_stream << header_backup; } else { ofile_stream << ((column_name = descriptor->get_column(Descriptor::MARKER)) != NULL ? column_name : Descriptor::MARKER); ofile_stream << header_separator; ofile_stream << ((column_name = descriptor->get_column(Descriptor::CHR)) != NULL ? column_name : Descriptor::CHR); ofile_stream << header_separator; ofile_stream << ((column_name = descriptor->get_column(Descriptor::POSITION)) != NULL ? column_name : Descriptor::POSITION); } for (unsigned int i = 0u; i < deviation->size(); ++i) { deviation_value = (int)deviation->at(i); if (deviation_value != 0) { ofile_stream << header_separator << "+/-" << deviation_value; } else { ofile_stream << header_separator << "IN"; } } ofile_stream << endl; while ((line_length = reader.read_line()) > 0) { line = *reader.line; if (regions_append) { ofile_stream << line; } column_position = 0; marker_token = NULL; chr_token = NULL; position_token = NULL; token = auxiliary::strtok(&line, data_separator); while (token != NULL) { if (column_position == marker_column_pos) { marker_token = token; } else if (column_position == chr_column_pos) { chr_token = token; } else if (column_position == position_column_pos) { position_token = token; } token = auxiliary::strtok(&line, data_separator); ++column_position; } if (column_position < total_columns) { throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 8, line_number, gwafile->get_descriptor()->get_name(), column_position, total_columns); } else if (column_position > total_columns) { throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 9, line_number, gwafile->get_descriptor()->get_name(), column_position, total_columns); } position = (int)strtol(position_token, &end_ptr, 10); if (*end_ptr != '\0') { throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 10, position_token, ((column_name = descriptor->get_column(Descriptor::POSITION)) != NULL) ? column_name : Descriptor::POSITION, line_number); } if (position < 0) { throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 11, ((column_name = descriptor->get_column(Descriptor::POSITION)) != NULL) ? column_name : Descriptor::POSITION, position, line_number); } if (!regions_append) { ofile_stream << marker_token << data_separator << chr_token << data_separator << position_token; } regions_indices_it = regions_indices.find(chr_token); if (regions_indices_it != regions_indices.end()) { genes_index = regions_indices_it->second; deviation_value = (int)deviation->back(); genes_index->get_intersecting_intervals(position - deviation_value, position + deviation_value, deviation_value, genes_index_subset); for (int i = deviation->size() - 2; i >= 0; --i) { deviation_value = (int)deviation->at(i); genes_index_subset.mark_intersecting_intervals(position - deviation_value, position + deviation_value, deviation_value); } genes_index_subset.get_marked_values(annotated_genes); if (annotated_genes.size() > 0) { for (unsigned int i = 0u; i < deviation->size(); ++i) { ofile_stream << data_separator; annotated_genes_it = annotated_genes.find(((int)deviation->at(i))); if (annotated_genes_it != annotated_genes.end()) { write_char_vector(ofile_stream, annotated_genes_it->second, region_separator); } else { ofile_stream << "NA"; } } ofile_stream << endl; } else { for (unsigned int i = 0u; i < deviation->size(); ++i) { ofile_stream << data_separator << "NA"; } ofile_stream << endl; } annotated_genes_it = annotated_genes.begin(); while(annotated_genes_it != annotated_genes.end()) { delete annotated_genes_it->second; annotated_genes_it++; } annotated_genes.clear(); genes_index_subset.clear(); } else { for (unsigned int i = 0u; i < deviation->size(); ++i) { ofile_stream << data_separator << "NA"; } ofile_stream << endl; } ++line_number; } } catch (ofstream::failure &e) { throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 19, output_file_name); } try { ofile_stream.close(); } catch (ofstream::failure &e) { throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 18, output_file_name); } if (line_length == 0) { throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 13, line_number, gwafile->get_descriptor()->get_name()); } } catch (DescriptorException &e) { AnnotatorException new_e(e); new_e.add_message("Annotator", "annotate_without_map()", __LINE__, 14, gwafile->get_descriptor()->get_name()); throw new_e; } catch (ReaderException &e) { AnnotatorException new_e(e); new_e.add_message("Annotator", "annotate_without_map()", __LINE__, 14, gwafile->get_descriptor()->get_name()); throw new_e; } catch (AnnotatorException &e) { e.add_message("Annotator", "annotate_without_map()", __LINE__, 14, gwafile->get_descriptor()->get_name()); throw; } }
void Annotator::annotate_with_map() throw (AnnotatorException) { Descriptor* descriptor = NULL; ofstream ofile_stream; const char* output_prefix = NULL; const char* file_name = NULL; char* output_file_name = NULL; char* line = NULL; int line_length = 0; unsigned int line_number = 2u; char* line_backup = NULL; char header_separator = '\0'; char data_separator = '\0'; char region_separator = '\0'; bool regions_append = false; char* token = NULL; int column_position = 0; const char* column_name = NULL; marker_index key_marker_index; marker_index* found_marker_index = NULL; unsigned int found_marker_index_pos = 0u; unsigned int index = 0u; char* chr = NULL; int position = 0; vector<double>* deviation = NULL; int deviation_value = 0; IntervalTree<char*>* genes_index = NULL; IntervalTree<char*> genes_index_subset; map<int, vector<char*>*> annotated_genes; map<int, vector<char*>*>::iterator annotated_genes_it; if (gwafile == NULL) { return; } try { descriptor = gwafile->get_descriptor(); output_prefix = descriptor->get_property(Descriptor::PREFIX); file_name = descriptor->get_name(); header_separator = gwafile->get_header_separator(); data_separator = gwafile->get_data_separator(); deviation = descriptor->get_threshold(Descriptor::REGIONS_DEVIATION); regions_append = gwafile->is_regions_append_on(); auxiliary::transform_file_name(&output_file_name, output_prefix, file_name, NULL, true); if (output_file_name == NULL) { throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 16); } if (regions_append) { line_backup = (char*)malloc(reader.get_buffer_size() * sizeof(char)); if (line_backup == NULL) { throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 2, (reader.get_buffer_size() * sizeof(char))); } } if (data_separator == ',') { region_separator = ';'; } else { region_separator = ','; } ofile_stream.exceptions(ios_base::failbit | ios_base::badbit); try { ofile_stream.open(output_file_name); } catch (ofstream::failure &e) { throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 17, output_file_name); } try { if (regions_append) { ofile_stream << header_backup; ofile_stream << header_separator; ofile_stream << ((column_name = descriptor->get_property(Descriptor::MAP_CHR)) != NULL ? column_name : Descriptor::MAP_CHR); ofile_stream << header_separator; ofile_stream << ((column_name = descriptor->get_property(Descriptor::MAP_POSITION)) != NULL ? column_name : Descriptor::MAP_POSITION); } else { ofile_stream << ((column_name = descriptor->get_column(Descriptor::MARKER)) != NULL ? column_name : Descriptor::MARKER); ofile_stream << header_separator; ofile_stream << ((column_name = descriptor->get_property(Descriptor::MAP_CHR)) != NULL ? column_name : Descriptor::MAP_CHR); ofile_stream << header_separator; ofile_stream << ((column_name = descriptor->get_property(Descriptor::MAP_POSITION)) != NULL ? column_name : Descriptor::MAP_POSITION); } for (unsigned int i = 0u; i < deviation->size(); ++i) { deviation_value = (int)deviation->at(i); if (deviation_value != 0) { ofile_stream << header_separator << "+/-" << deviation_value; } else { ofile_stream << header_separator << "IN"; } } ofile_stream << endl; while ((line_length = reader.read_line()) > 0) { line = *reader.line; if (regions_append) { strcpy(line_backup, line); } column_position = 0; key_marker_index.name = NULL; token = auxiliary::strtok(&line, data_separator); while (token != NULL) { if (column_position == marker_column_pos) { key_marker_index.name = token; } token = auxiliary::strtok(&line, data_separator); ++column_position; } if (column_position < total_columns) { throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 8, line_number, gwafile->get_descriptor()->get_name(), column_position, total_columns); } else if (column_position > total_columns) { throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 9, line_number, gwafile->get_descriptor()->get_name(), column_position, total_columns); } found_marker_index = (marker_index*)bsearch(&key_marker_index, map_index, map_index_size, sizeof(marker_index), qsort_marker_index_cmp); if (found_marker_index == NULL) { if (regions_append) { ofile_stream << line_backup << data_separator << "NA" << data_separator << "NA"; } else { ofile_stream << key_marker_index.name << data_separator << "NA" << data_separator << "NA"; } for (unsigned int i = 0u; i < deviation->size(); ++i) { ofile_stream << data_separator << "NA"; } ofile_stream << endl; } else { found_marker_index_pos = found_marker_index - map_index; while ((found_marker_index_pos < map_index_size) && (auxiliary::strcmp_ignore_case(key_marker_index.name, map_index[found_marker_index_pos].name) == 0)) { index = map_index[found_marker_index_pos].index; chr = map_chromosomes[index]; position = map_positions[index]; if (regions_append) { ofile_stream << line_backup << data_separator << chr << data_separator << position; } else { ofile_stream << key_marker_index.name << data_separator << chr << data_separator << position; } regions_indices_it = regions_indices.find(chr); if (regions_indices_it != regions_indices.end()) { genes_index = regions_indices_it->second; deviation_value = (int)deviation->back(); genes_index->get_intersecting_intervals(position - deviation_value, position + deviation_value, deviation_value, genes_index_subset); for (int i = deviation->size() - 2; i >= 0; --i) { deviation_value = (int)deviation->at(i); genes_index_subset.mark_intersecting_intervals(position - deviation_value, position + deviation_value, deviation_value); } genes_index_subset.get_marked_values(annotated_genes); if (annotated_genes.size() > 0) { for (unsigned int i = 0u; i < deviation->size(); ++i) { ofile_stream << data_separator; annotated_genes_it = annotated_genes.find(((int)deviation->at(i))); if (annotated_genes_it != annotated_genes.end()) { write_char_vector(ofile_stream, annotated_genes_it->second, region_separator); } else { ofile_stream << "NA"; } } ofile_stream << endl; } else { for (unsigned int i = 0u; i < deviation->size(); ++i) { ofile_stream << data_separator << "NA"; } ofile_stream << endl; } annotated_genes_it = annotated_genes.begin(); while(annotated_genes_it != annotated_genes.end()) { delete annotated_genes_it->second; annotated_genes_it++; } annotated_genes.clear(); genes_index_subset.clear(); } else { for (unsigned int i = 0u; i < deviation->size(); ++i) { ofile_stream << data_separator << "NA"; } ofile_stream << endl; } ++found_marker_index_pos; } } ++line_number; } } catch (ofstream::failure &e) { throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 19, output_file_name); } try { ofile_stream.close(); } catch (ofstream::failure &e) { throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 18, output_file_name); } if (line_backup != NULL) { free(line_backup); line_backup = NULL; } if (line_length == 0) { throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 13, line_number, gwafile->get_descriptor()->get_name()); } } catch (DescriptorException &e) { AnnotatorException new_e(e); new_e.add_message("Annotator", "annotate_with_map()", __LINE__, 14, gwafile->get_descriptor()->get_name()); throw new_e; } catch (ReaderException &e) { AnnotatorException new_e(e); new_e.add_message("Annotator", "annotate_with_map()", __LINE__, 14, gwafile->get_descriptor()->get_name()); throw new_e; } catch (AnnotatorException &e) { e.add_message("Annotator", "annotate_with_map()", __LINE__, 14, gwafile->get_descriptor()->get_name()); throw; } }
void Annotator::process_header_with_map() throw (AnnotatorException) { Descriptor* descriptor = NULL; char header_separator = '\0'; char* header = NULL; char* token = NULL; int column_position = 0; const char* column_name = NULL; bool regions_append = false; if (gwafile == NULL) { return; } try { descriptor = gwafile->get_descriptor(); header_separator = gwafile->get_header_separator(); regions_append = gwafile->is_regions_append_on(); if (reader.read_line() <= 0) { throw AnnotatorException("Annotator", "process_header_with_map()", __LINE__, 5, 1, gwafile->get_descriptor()->get_name()); } header = *reader.line; if (regions_append) { header_backup = (char*)malloc((strlen(header) + 1u) * sizeof(char)); if (header_backup == NULL) { throw AnnotatorException("Annotator", "process_header_with_map()", __LINE__, 2, ((strlen(header) + 1u) * sizeof(char))); } strcpy(header_backup, header); } total_columns = numeric_limits<int>::min(); marker_column_pos = numeric_limits<int>::min(); token = auxiliary::strtok(&header, header_separator); while (token != NULL) { column_name = descriptor->get_default_column(token, gwafile->is_case_sensitive()); if (column_name != NULL) { if (strcmp(column_name, Descriptor::MARKER) == 0) { marker_column_pos = column_position; } } token = auxiliary::strtok(&header, header_separator); ++column_position; } total_columns = column_position; if (marker_column_pos < 0) { throw AnnotatorException("Annotator", "process_header_with_map()", __LINE__, 7, ((column_name = descriptor->get_column(Descriptor::MARKER)) != NULL) ? column_name : Descriptor::MARKER, gwafile->get_descriptor()->get_name()); } } catch (ReaderException &e) { AnnotatorException new_e(e); new_e.add_message("Annotator", "process_header_with_map()", __LINE__, 6, gwafile->get_descriptor()->get_name()); throw new_e; } catch (DescriptorException &e) { AnnotatorException new_e(e); new_e.add_message("Annotator", "process_header_with_map()", __LINE__, 6, gwafile->get_descriptor()->get_name()); throw new_e; } }