예제 #1
0
void Annotator::annotate_without_map() throw (AnnotatorException) {
	Descriptor* descriptor = NULL;

	ofstream ofile_stream;

	const char* output_prefix = NULL;
	const char* file_name = NULL;
	char* output_file_name = NULL;

	char* line = NULL;
	int line_length = 0;
	unsigned int line_number = 2u;

	char header_separator = '\0';
	char data_separator = '\0';
	char region_separator = '\0';
	bool regions_append = false;

	char* token = NULL;
	char* end_ptr = NULL;

	int column_position = 0;

	const char* column_name = NULL;

	char* marker_token = NULL;
	char* chr_token = NULL;
	char* position_token = NULL;

	int position = 0;

	vector<double>* deviation = NULL;
	int deviation_value = 0;

	IntervalTree<char*>* genes_index = NULL;
	IntervalTree<char*> genes_index_subset;

	map<int, vector<char*>*> annotated_genes;
	map<int, vector<char*>*>::iterator annotated_genes_it;

	if (gwafile == NULL) {
		return;
	}

	try {
		descriptor = gwafile->get_descriptor();
		output_prefix = descriptor->get_property(Descriptor::PREFIX);
		file_name = descriptor->get_name();
		header_separator = gwafile->get_header_separator();
		data_separator = gwafile->get_data_separator();
		deviation = descriptor->get_threshold(Descriptor::REGIONS_DEVIATION);
		regions_append = gwafile->is_regions_append_on();

		auxiliary::transform_file_name(&output_file_name, output_prefix, file_name, NULL, true);
		if (output_file_name == NULL) {
			throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 16);
		}

		if (data_separator == ',') {
			region_separator = ';';
		} else {
			region_separator = ',';
		}

		ofile_stream.exceptions(ios_base::failbit | ios_base::badbit);

		try {
			ofile_stream.open(output_file_name);
		} catch (ofstream::failure &e) {
			throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 17, output_file_name);
		}

		try {
			if (regions_append) {
				ofile_stream << header_backup;
			} else {
				ofile_stream << ((column_name = descriptor->get_column(Descriptor::MARKER)) != NULL ? column_name : Descriptor::MARKER);
				ofile_stream << header_separator;
				ofile_stream << ((column_name = descriptor->get_column(Descriptor::CHR)) != NULL ? column_name : Descriptor::CHR);
				ofile_stream << header_separator;
				ofile_stream << ((column_name = descriptor->get_column(Descriptor::POSITION)) != NULL ? column_name : Descriptor::POSITION);
			}
			for (unsigned int i = 0u; i < deviation->size(); ++i) {
				deviation_value = (int)deviation->at(i);
				if (deviation_value != 0) {
					ofile_stream << header_separator << "+/-" << deviation_value;
				} else {
					ofile_stream << header_separator << "IN";
				}
			}
			ofile_stream << endl;

			while ((line_length = reader.read_line()) > 0) {
				line = *reader.line;

				if (regions_append) {
					ofile_stream << line;
				}

				column_position = 0;
				marker_token = NULL;
				chr_token = NULL;
				position_token = NULL;
				token = auxiliary::strtok(&line, data_separator);
				while (token != NULL) {
					if (column_position == marker_column_pos) {
						marker_token = token;
					} else if (column_position == chr_column_pos) {
						chr_token = token;
					} else if (column_position == position_column_pos) {
						position_token = token;
					}
					token = auxiliary::strtok(&line, data_separator);
					++column_position;
				}

				if (column_position < total_columns) {
					throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 8, line_number, gwafile->get_descriptor()->get_name(), column_position, total_columns);
				} else if (column_position > total_columns) {
					throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 9, line_number, gwafile->get_descriptor()->get_name(), column_position, total_columns);
				}

				position = (int)strtol(position_token, &end_ptr, 10);
				if (*end_ptr != '\0') {
					throw AnnotatorException("Annotator", "annotate_without_map()",  __LINE__, 10, position_token, ((column_name = descriptor->get_column(Descriptor::POSITION)) != NULL) ? column_name : Descriptor::POSITION, line_number);
				}

				if (position < 0) {
					throw AnnotatorException("Annotator", "annotate_without_map()",  __LINE__, 11, ((column_name = descriptor->get_column(Descriptor::POSITION)) != NULL) ? column_name : Descriptor::POSITION, position, line_number);
				}

				if (!regions_append) {
					ofile_stream << marker_token << data_separator << chr_token << data_separator << position_token;
				}

				regions_indices_it = regions_indices.find(chr_token);
				if (regions_indices_it != regions_indices.end()) {
					genes_index = regions_indices_it->second;

					deviation_value = (int)deviation->back();
					genes_index->get_intersecting_intervals(position - deviation_value, position + deviation_value, deviation_value, genes_index_subset);
					for (int i = deviation->size() - 2; i >= 0; --i) {
						deviation_value = (int)deviation->at(i);
						genes_index_subset.mark_intersecting_intervals(position - deviation_value, position + deviation_value, deviation_value);
					}

					genes_index_subset.get_marked_values(annotated_genes);

					if (annotated_genes.size() > 0) {
						for (unsigned int i = 0u; i < deviation->size(); ++i) {
							ofile_stream << data_separator;
							annotated_genes_it = annotated_genes.find(((int)deviation->at(i)));
							if (annotated_genes_it != annotated_genes.end()) {
								write_char_vector(ofile_stream, annotated_genes_it->second, region_separator);
							} else {
								ofile_stream << "NA";
							}
						}
						ofile_stream << endl;
					} else {
						for (unsigned int i = 0u; i < deviation->size(); ++i) {
							ofile_stream << data_separator << "NA";
						}
						ofile_stream << endl;
					}

					annotated_genes_it = annotated_genes.begin();
					while(annotated_genes_it != annotated_genes.end()) {
						delete annotated_genes_it->second;
						annotated_genes_it++;
					}
					annotated_genes.clear();

					genes_index_subset.clear();
				} else {
					for (unsigned int i = 0u; i < deviation->size(); ++i) {
						ofile_stream << data_separator << "NA";
					}
					ofile_stream << endl;
				}

				++line_number;
			}
		} catch (ofstream::failure &e) {
			throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 19, output_file_name);
		}

		try {
			ofile_stream.close();
		} catch (ofstream::failure &e) {
			throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 18, output_file_name);
		}

		if (line_length == 0) {
			throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 13, line_number, gwafile->get_descriptor()->get_name());
		}
	} catch (DescriptorException &e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "annotate_without_map()", __LINE__, 14, gwafile->get_descriptor()->get_name());
		throw new_e;
	} catch (ReaderException &e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "annotate_without_map()", __LINE__, 14, gwafile->get_descriptor()->get_name());
		throw new_e;
	} catch (AnnotatorException &e) {
		e.add_message("Annotator", "annotate_without_map()", __LINE__, 14, gwafile->get_descriptor()->get_name());
		throw;
	}
}
예제 #2
0
void Annotator::annotate_with_map() throw (AnnotatorException) {
	Descriptor* descriptor = NULL;

	ofstream ofile_stream;

	const char* output_prefix = NULL;
	const char* file_name = NULL;
	char* output_file_name = NULL;

	char* line = NULL;
	int line_length = 0;
	unsigned int line_number = 2u;
	char* line_backup = NULL;

	char header_separator = '\0';
	char data_separator = '\0';
	char region_separator = '\0';
	bool regions_append = false;

	char* token = NULL;

	int column_position = 0;

	const char* column_name = NULL;

	marker_index key_marker_index;
	marker_index* found_marker_index = NULL;
	unsigned int found_marker_index_pos = 0u;
	unsigned int index = 0u;
	char* chr = NULL;
	int position = 0;

	vector<double>* deviation = NULL;
	int deviation_value = 0;

	IntervalTree<char*>* genes_index = NULL;
	IntervalTree<char*> genes_index_subset;

	map<int, vector<char*>*> annotated_genes;
	map<int, vector<char*>*>::iterator annotated_genes_it;

	if (gwafile == NULL) {
		return;
	}

	try {
		descriptor = gwafile->get_descriptor();
		output_prefix = descriptor->get_property(Descriptor::PREFIX);
		file_name = descriptor->get_name();
		header_separator = gwafile->get_header_separator();
		data_separator = gwafile->get_data_separator();
		deviation = descriptor->get_threshold(Descriptor::REGIONS_DEVIATION);
		regions_append = gwafile->is_regions_append_on();

		auxiliary::transform_file_name(&output_file_name, output_prefix, file_name, NULL, true);
		if (output_file_name == NULL) {
			throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 16);
		}

		if (regions_append) {
			line_backup = (char*)malloc(reader.get_buffer_size() * sizeof(char));
			if (line_backup == NULL) {
				throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 2, (reader.get_buffer_size() * sizeof(char)));
			}
		}

		if (data_separator == ',') {
			region_separator = ';';
		} else {
			region_separator = ',';
		}

		ofile_stream.exceptions(ios_base::failbit | ios_base::badbit);

		try {
			ofile_stream.open(output_file_name);
		} catch (ofstream::failure &e) {
			throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 17, output_file_name);
		}

		try {
			if (regions_append) {
				ofile_stream << header_backup;
				ofile_stream << header_separator;
				ofile_stream << ((column_name = descriptor->get_property(Descriptor::MAP_CHR)) != NULL ? column_name : Descriptor::MAP_CHR);
				ofile_stream << header_separator;
				ofile_stream << ((column_name = descriptor->get_property(Descriptor::MAP_POSITION)) != NULL ? column_name : Descriptor::MAP_POSITION);
			} else {
				ofile_stream << ((column_name = descriptor->get_column(Descriptor::MARKER)) != NULL ? column_name : Descriptor::MARKER);
				ofile_stream << header_separator;
				ofile_stream << ((column_name = descriptor->get_property(Descriptor::MAP_CHR)) != NULL ? column_name : Descriptor::MAP_CHR);
				ofile_stream << header_separator;
				ofile_stream << ((column_name = descriptor->get_property(Descriptor::MAP_POSITION)) != NULL ? column_name : Descriptor::MAP_POSITION);
			}
			for (unsigned int i = 0u; i < deviation->size(); ++i) {
				deviation_value = (int)deviation->at(i);
				if (deviation_value != 0) {
					ofile_stream << header_separator << "+/-" << deviation_value;
				} else {
					ofile_stream << header_separator << "IN";
				}
			}
			ofile_stream << endl;

			while ((line_length = reader.read_line()) > 0) {
				line = *reader.line;

				if (regions_append) {
					strcpy(line_backup, line);
				}

				column_position = 0;
				key_marker_index.name = NULL;
				token = auxiliary::strtok(&line, data_separator);
				while (token != NULL) {
					if (column_position == marker_column_pos) {
						key_marker_index.name = token;
					}
					token = auxiliary::strtok(&line, data_separator);
					++column_position;
				}

				if (column_position < total_columns) {
					throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 8, line_number, gwafile->get_descriptor()->get_name(), column_position, total_columns);
				} else if (column_position > total_columns) {
					throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 9, line_number, gwafile->get_descriptor()->get_name(), column_position, total_columns);
				}

				found_marker_index = (marker_index*)bsearch(&key_marker_index, map_index, map_index_size, sizeof(marker_index), qsort_marker_index_cmp);
				if (found_marker_index == NULL) {
					if (regions_append) {
						ofile_stream << line_backup << data_separator << "NA" << data_separator << "NA";
					} else {
						ofile_stream << key_marker_index.name << data_separator << "NA" << data_separator << "NA";
					}
					for (unsigned int i = 0u; i < deviation->size(); ++i) {
						ofile_stream << data_separator << "NA";
					}
					ofile_stream << endl;
				} else {
					found_marker_index_pos = found_marker_index - map_index;
					while ((found_marker_index_pos < map_index_size) && (auxiliary::strcmp_ignore_case(key_marker_index.name, map_index[found_marker_index_pos].name) == 0)) {
						index = map_index[found_marker_index_pos].index;
						chr = map_chromosomes[index];
						position = map_positions[index];

						if (regions_append) {
							ofile_stream << line_backup << data_separator << chr << data_separator << position;
						} else {
							ofile_stream << key_marker_index.name << data_separator << chr << data_separator << position;
						}

						regions_indices_it = regions_indices.find(chr);
						if (regions_indices_it != regions_indices.end()) {
							genes_index = regions_indices_it->second;

							deviation_value = (int)deviation->back();
							genes_index->get_intersecting_intervals(position - deviation_value, position + deviation_value, deviation_value, genes_index_subset);
							for (int i = deviation->size() - 2; i >= 0; --i) {
								deviation_value = (int)deviation->at(i);
								genes_index_subset.mark_intersecting_intervals(position - deviation_value, position + deviation_value, deviation_value);
							}

							genes_index_subset.get_marked_values(annotated_genes);

							if (annotated_genes.size() > 0) {
								for (unsigned int i = 0u; i < deviation->size(); ++i) {
									ofile_stream << data_separator;
									annotated_genes_it = annotated_genes.find(((int)deviation->at(i)));
									if (annotated_genes_it != annotated_genes.end()) {
										write_char_vector(ofile_stream, annotated_genes_it->second, region_separator);
									} else {
										ofile_stream << "NA";
									}
								}
								ofile_stream << endl;
							} else {
								for (unsigned int i = 0u; i < deviation->size(); ++i) {
									ofile_stream << data_separator << "NA";
								}
								ofile_stream << endl;
							}

							annotated_genes_it = annotated_genes.begin();
							while(annotated_genes_it != annotated_genes.end()) {
								delete annotated_genes_it->second;
								annotated_genes_it++;
							}
							annotated_genes.clear();

							genes_index_subset.clear();
						} else {
							for (unsigned int i = 0u; i < deviation->size(); ++i) {
								ofile_stream << data_separator << "NA";
							}
							ofile_stream << endl;
						}

						++found_marker_index_pos;
					}
				}

				++line_number;
			}
		} catch (ofstream::failure &e) {
			throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 19, output_file_name);
		}

		try {
			ofile_stream.close();
		} catch (ofstream::failure &e) {
			throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 18, output_file_name);
		}

		if (line_backup != NULL) {
			free(line_backup);
			line_backup = NULL;
		}

		if (line_length == 0) {
			throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 13, line_number, gwafile->get_descriptor()->get_name());
		}
	} catch (DescriptorException &e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "annotate_with_map()", __LINE__, 14, gwafile->get_descriptor()->get_name());
		throw new_e;
	} catch (ReaderException &e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "annotate_with_map()", __LINE__, 14, gwafile->get_descriptor()->get_name());
		throw new_e;
	} catch (AnnotatorException &e) {
		e.add_message("Annotator", "annotate_with_map()", __LINE__, 14, gwafile->get_descriptor()->get_name());
		throw;
	}
}
예제 #3
0
void Annotator::process_header_with_map() throw (AnnotatorException) {
	Descriptor* descriptor = NULL;
	char header_separator = '\0';
	char* header = NULL;
	char* token = NULL;
	int column_position = 0;
	const char* column_name = NULL;
	bool regions_append = false;

	if (gwafile == NULL) {
		return;
	}

	try {
		descriptor = gwafile->get_descriptor();
		header_separator = gwafile->get_header_separator();
		regions_append = gwafile->is_regions_append_on();

		if (reader.read_line() <= 0) {
			throw AnnotatorException("Annotator", "process_header_with_map()", __LINE__, 5, 1, gwafile->get_descriptor()->get_name());
		}

		header = *reader.line;

		if (regions_append) {
			header_backup = (char*)malloc((strlen(header) + 1u) * sizeof(char));
			if (header_backup == NULL) {
				throw AnnotatorException("Annotator", "process_header_with_map()", __LINE__, 2, ((strlen(header) + 1u) * sizeof(char)));
			}
			strcpy(header_backup, header);
		}

		total_columns = numeric_limits<int>::min();
		marker_column_pos = numeric_limits<int>::min();

		token = auxiliary::strtok(&header, header_separator);
		while (token != NULL) {
			column_name = descriptor->get_default_column(token, gwafile->is_case_sensitive());
			if (column_name != NULL) {
				if (strcmp(column_name, Descriptor::MARKER) == 0) {
					marker_column_pos = column_position;
				}
			}
			token = auxiliary::strtok(&header, header_separator);
			++column_position;
		}

		total_columns = column_position;

		if (marker_column_pos < 0) {
			throw AnnotatorException("Annotator", "process_header_with_map()", __LINE__, 7, ((column_name = descriptor->get_column(Descriptor::MARKER)) != NULL) ? column_name : Descriptor::MARKER, gwafile->get_descriptor()->get_name());
		}
	} catch (ReaderException &e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "process_header_with_map()", __LINE__, 6, gwafile->get_descriptor()->get_name());
		throw new_e;
	} catch (DescriptorException &e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "process_header_with_map()", __LINE__, 6, gwafile->get_descriptor()->get_name());
		throw new_e;
	}
}