Exemplo n.º 1
0
void Annotator::open_regions_file() throw (AnnotatorException) {
	if (gwafile == NULL) {
		return;
	}

	try {
		close_regions_file();

		regions_file = gwafile->get_descriptor()->get_property(Descriptor::REGIONS_FILE);
		if (regions_file == NULL) {
			return;
		}

		regions_reader.set_file_name(regions_file);
		regions_reader.open();
	} catch (ReaderException& e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "open_regions_file()", __LINE__, 3, (regions_file != NULL) ? regions_file : "NULL");
		throw new_e;
	}  catch (DescriptorException& e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "open_regions_file()", __LINE__, 3, (regions_file != NULL) ? regions_file : "NULL");
		throw new_e;
	} catch (AnnotatorException& e) {
		e.add_message("Annotator", "open_regions_file()", __LINE__, 3, (regions_file != NULL) ? regions_file : "NULL");
		throw;
	}
}
Exemplo n.º 2
0
void Annotator::open_gwafile(GwaFile* gwafile) throw (AnnotatorException) {
	if (gwafile == NULL) {
		throw AnnotatorException("Annotator", "open_gwafile( GwaFile* )", __LINE__, 0, "gwafile");
	}

	try {
		close_gwafile();

		if (gwafile->get_descriptor()->get_property(Descriptor::MAP_FILE) != NULL) {
			has_map = true;
		} else {
			has_map = false;
		}

		this->gwafile = gwafile;
		reader.set_file_name(gwafile->get_descriptor()->get_full_path());
		reader.open();
	} catch (DescriptorException& e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "open_gwafile( GwaFile* )", __LINE__, 3, gwafile->get_descriptor()->get_full_path());
		throw new_e;
	} catch (ReaderException& e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "open_gwafile( GwaFile* )", __LINE__, 3, gwafile->get_descriptor()->get_full_path());
		throw new_e;
	} catch (AnnotatorException& e) {
		e.add_message("Annotator", "open_gwafile( GwaFile* )", __LINE__, 3, gwafile->get_descriptor()->get_full_path());
		throw;
	}
}
Exemplo n.º 3
0
void Formatter::close_gwafile() throw (FormatterException) {
	try {
		reader.close();
	} catch (ReaderException &e) {
		FormatterException new_e(e);
		new_e.add_message("Formatter", "close_gwafile()", __LINE__, 4, gwafile != NULL ? gwafile->get_descriptor()->get_full_path() : "NULL");
		throw new_e;
	}

	gwafile = NULL;

	input_columns.clear();

	for (columns_it = output_columns.begin(); columns_it != output_columns.end(); columns_it++) {
		delete *columns_it;
	}
	output_columns.clear();

	maf_column = NULL;
	pvalue_column = NULL;
	stderr_column = NULL;
	n_total_column = NULL;
	oevar_imp_column = NULL;

	pvalue_column_pos = numeric_limits<int>::min();
	maf_column_pos = numeric_limits<int>::min();
	oevar_imp_column_pos = numeric_limits<int>::min();
}
Exemplo n.º 4
0
/**
 * \brief Get the scene elements of the component.
 * \param e (out) The scene elements.
 */
void bear::gui::static_text::display
( std::list<visual::scene_element>& e ) const
{
  visual::scene_writing new_e
    (left() + m_margin.x, bottom() + m_margin.y, m_writing);
  new_e.get_rendering_attributes().combine(m_text_rendering_attributes);

  e.push_back( new_e );
} // static_text::display()
Exemplo n.º 5
0
 // Apply transformations of the form
 //
 // (ite c (+ k1 a) (+ k2 a)) --> (+ (ite c k1 k2) a)   
 // (ite c (* k1 a) (* k2 a)) --> (* (ite c k1 k2) a)
 //
 // These transformations are useful for bit-vector problems, since
 // they will minimize the number of adders/multipliers/etc
 br_status push_ite(func_decl * f, unsigned num, expr * const * args, expr_ref & result) {
     if (!m().is_ite(f))
         return BR_FAILED;
     expr * c = args[0];
     expr * t = args[1];
     expr * e = args[2];
     func_decl * f_prime = 0;
     expr_ref new_t(m()), new_e(m()), common(m());
     bool first;
     TRACE("push_ite", tout << "unifying:\n" << mk_ismt2_pp(t, m()) << "\n" << mk_ismt2_pp(e, m()) << "\n";);
Exemplo n.º 6
0
void Annotator::close_regions_file() throw (AnnotatorException) {
	try {
		regions_reader.close();
	} catch (ReaderException &e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "close_regions_file()", __LINE__, 4, (regions_file != NULL) ? regions_file : "NULL");
		throw new_e;
	}

	regions_file = NULL;
}
Exemplo n.º 7
0
void Annotator::close_gwafile() throw (AnnotatorException) {
	try {
		reader.close();
	} catch (ReaderException &e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "close_gwafile()", __LINE__, 4, (gwafile != NULL) ? gwafile->get_descriptor()->get_full_path() : "NULL");
		throw new_e;
	}

	regions_indices_it = regions_indices.begin();
	while (regions_indices_it != regions_indices.end()) {
		free(regions_indices_it->first);
		delete regions_indices_it->second;
		regions_indices_it++;
	}
	regions_indices.clear();

	if (map_index != NULL) {
		for (unsigned int i = 0u; i < map_index_size; ++i) {
			free(map_index[i].name);
			map_index[i].name = NULL;
		}
		free(map_index);
		map_index = NULL;
	}

	if (map_chromosomes != NULL) {
		for (unsigned int i = 0u; i < map_index_size; ++i) {
			free(map_chromosomes[i]);
			map_chromosomes[i] = NULL;
		}
		free(map_chromosomes);
		map_chromosomes = NULL;
	}

	if (map_positions != NULL) {
		free(map_positions);
		map_positions = NULL;
	}

	map_index_size = 0u;
	current_map_heap_size = 0u;

	if (header_backup != NULL) {
		free(header_backup);
		header_backup = NULL;
	}

	gwafile = NULL;
	has_map = false;
}
Exemplo n.º 8
0
void MetaNumeric::finalize() throw (MetaException) {
	if (n <= 0) {
		numeric = false;
		free(data);
		data = NULL;
		return;
	}

	if (numeric) {
		mean = auxiliary::stats_mean(data, n);
		sd = auxiliary::stats_sd(data, n, mean);
		skew = auxiliary::stats_skewness(data, n, mean, sd);
		kurtosis = auxiliary::stats_kurtosis(data, n, mean, sd);

		qsort(data, n, sizeof(double), dblcmp);

		median = auxiliary::stats_median_from_sorted_data(data, n);

		for (unsigned int j = 0; j < 9; j++) {
			quantiles[j][1] = auxiliary::stats_quantile_from_sorted_data(data, n, quantiles[j][0]);
		}

		min = data[0];
		max = data[n - 1];

		try {
			if (create_histogram) {
				histogram = Histogram::create(actual_name, data, n, 1000);
				if (histogram != NULL) {
					histogram->set_title(get_description());
				}
			}

			if (create_boxplot) {
				boxplot = Boxplot::create(actual_name, data, n, median);
				if (boxplot != NULL) {
					boxplot->set_quantiles(quantiles[0][1], quantiles[3][1], quantiles[4][1], quantiles[5][1], quantiles[8][1]);
					boxplot->set_title(get_description());
				}
			}
		} catch (PlotException &e) {
			MetaException new_e(e);
			new_e.add_message("MetaNumeric", "finalize()", __LINE__, 4, actual_name != NULL ? actual_name : "NULL");
			throw new_e;
		}
	}

	free(data);
	data = NULL;
}
Exemplo n.º 9
0
void Formatter::open_gwafile(GwaFile* gwafile) throw (FormatterException) {
	if (gwafile == NULL) {
		throw FormatterException("Formatter", "open_gwafile( GwaFile* )", __LINE__, 0, "gwafile");
	}

	try {
		close_gwafile();
		this->gwafile = gwafile;
		reader.set_file_name(gwafile->get_descriptor()->get_full_path());
		reader.open();
	} catch (ReaderException& e) {
		FormatterException new_e(e);
		new_e.add_message("Formatter", "open_gwafile( GwaFile* )", __LINE__, 3, gwafile->get_descriptor()->get_full_path());
		throw new_e;
	} catch (FormatterException& e) {
		e.add_message("Formatter", "open_gwafile( GwaFile* )", __LINE__, 3, gwafile->get_descriptor()->get_full_path());
		throw;
	}
}
Exemplo n.º 10
0
void Annotator::process_map_file_data() throw (AnnotatorException) {
	Descriptor* descriptor = NULL;
	char* line = NULL;
	int line_length = 0;
	unsigned int line_number = 2u;

	char data_separator = '\0';

	char* token = NULL;
	char* end_ptr = NULL;

	int column_position = 0;

	const char* map_marker_column = NULL;
	const char* map_chr_column = NULL;
	const char* map_position_column = NULL;

	char* map_marker_token = NULL;
	char* map_chr_token = NULL;
	char* map_position_token = NULL;

	marker_index* map_index_new = NULL;
	char** map_chromosomes_new = NULL;
	int* map_positions_new = NULL;

	if ((gwafile == NULL) || (map_file == NULL)) {
		return;
	}

	try {
		descriptor = gwafile->get_descriptor();
		data_separator = gwafile->get_regions_file_data_separator();

		map_marker_column = descriptor->get_property(Descriptor::MAP_MARKER);
		map_chr_column = descriptor->get_property(Descriptor::MAP_CHR);
		map_position_column = descriptor->get_property(Descriptor::MAP_POSITION);

		current_map_heap_size = MAP_HEAP_SIZE;

		map_index = (marker_index*)malloc(current_map_heap_size * sizeof(marker_index));
		if (map_index == NULL) {
			throw AnnotatorException("Annotator", "process_map_file_data()", __LINE__, 2, (current_map_heap_size * sizeof(marker_index)));
		}

		map_chromosomes = (char**)malloc(current_map_heap_size * sizeof(char*));
		if (map_chromosomes == NULL) {
			throw AnnotatorException("Annotator", "process_map_file_data()", __LINE__, 2, (current_map_heap_size * sizeof(char*)));
		}

		map_positions = (int*)malloc(current_map_heap_size * sizeof(int));
		if (map_positions == NULL) {
			throw AnnotatorException("Annotator", "process_map_file_data()", __LINE__, 2, (current_map_heap_size * sizeof(int)));
		}

		while ((line_length = map_reader.read_line()) > 0) {
			line = *map_reader.line;

			column_position = 0;
			map_marker_token = NULL;
			map_chr_token = NULL;
			map_position_token = NULL;
			token = auxiliary::strtok(&line, data_separator);
			while (token != NULL) {
				if (column_position == map_marker_column_pos) {
					map_marker_token = token;
				} else if (column_position == map_chr_column_pos) {
					map_chr_token = token;
				} else if (column_position == map_position_column_pos) {
					map_position_token = token;
				}
				token = auxiliary::strtok(&line, data_separator);
				++column_position;
			}

			if (column_position < map_file_total_columns) {
				throw AnnotatorException("Annotator", "process_map_file_data()", __LINE__, 8, line_number, map_file, column_position, map_file_total_columns);
			} else if (column_position > map_file_total_columns) {
				throw AnnotatorException("Annotator", "process_map_file_data()", __LINE__, 9, line_number, map_file, column_position, map_file_total_columns);
			}

			if ((auxiliary::strcmp_ignore_case(map_marker_token, "NA") != 0) &&
					(auxiliary::strcmp_ignore_case(map_chr_token, "NA") != 0) &&
					(auxiliary::strcmp_ignore_case(map_position_token, "NA") != 0)) {
				if (map_index_size >= current_map_heap_size) {
					current_map_heap_size += MAP_HEAP_INCREMENT;

					map_index_new = (marker_index*)realloc(map_index, current_map_heap_size * sizeof(marker_index));
					if (map_index_new == NULL) {
						throw AnnotatorException("Annotator", "process_map_file_data()", __LINE__, 22);
					}
					map_index = map_index_new;
					map_index_new = NULL;

					map_chromosomes_new = (char**)realloc(map_chromosomes, current_map_heap_size * sizeof(char*));
					if (map_chromosomes_new == NULL) {
						throw AnnotatorException("Annotator", "process_map_file_data()", __LINE__, 22);
					}
					map_chromosomes = map_chromosomes_new;
					map_chromosomes_new = NULL;

					map_positions_new = (int*)realloc(map_positions, current_map_heap_size * sizeof(int));
					if (map_positions_new == NULL) {
						throw AnnotatorException("Annotator", "process_map_file_data()", __LINE__, 22);
					}
					map_positions = map_positions_new;
					map_positions_new = NULL;
				}

				map_index[map_index_size].name = (char*)malloc((strlen(map_marker_token) + 1u)  * sizeof(char));
				if (map_index[map_index_size].name == NULL) {
					throw AnnotatorException("Annotator", "process_map_file_data()",  __LINE__, 2, ((strlen(map_marker_token) + 1u) * sizeof(char)));
				}
				strcpy(map_index[map_index_size].name, map_marker_token);

				map_index[map_index_size].index = map_index_size;

				map_chromosomes[map_index_size]  = (char*)malloc((strlen(map_chr_token) + 1u) * sizeof(char));
				if (map_chromosomes[map_index_size]  == NULL) {
					throw AnnotatorException("Annotator", "process_map_file_data()",  __LINE__, 2, ((strlen(map_chr_token) + 1u) * sizeof(char)));
				}
				strcpy(map_chromosomes[map_index_size], map_chr_token);

				map_positions[map_index_size] = (int)strtol(map_position_token, &end_ptr, 10);
				if (*end_ptr != '\0') {
					throw AnnotatorException("Annotator", "process_map_file_data()",  __LINE__, 10, map_position_token, map_position_column, line_number);
				}

				if (map_positions[map_index_size] < 0) {
					throw AnnotatorException("Annotator", "process_map_file_data()",  __LINE__, 11, map_position_column, map_positions[map_index_size], line_number);
				}

				++map_index_size;
			}
			++line_number;
		}

		if (line_length == 0) {
			throw AnnotatorException("Annotator", "process_map_file_data()", __LINE__, 13, line_number, map_file);
		}

		qsort(map_index, map_index_size, sizeof(marker_index), qsort_marker_index_cmp);

	} catch (ReaderException &e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "process_map_file_data()", __LINE__, 14, map_file);
		throw new_e;
	} catch (DescriptorException &e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "process_map_file_data()", __LINE__, 14, map_file);
		throw new_e;
	} catch (AnnotatorException &e) {
		e.add_message("Annotator", "process_map_file_data()", __LINE__, 14, map_file);
		throw;
	}
}
Exemplo n.º 11
0
void Annotator::process_regions_file_data()  throw (AnnotatorException) {
	Descriptor* descriptor = NULL;
	char* line = NULL;
	int line_length = 0;
	unsigned int line_number = 2u;

	char data_separator = '\0';

	char* token = NULL;
	char* end_ptr = NULL;

	int column_position = 0;

	const char* region_name_column = NULL;
	const char* region_chr_column = NULL;
	const char* region_start_column = NULL;
	const char* region_end_column = NULL;

	char* region_name_token = NULL;
	char* region_chr_token = NULL;
	char* region_start_token = NULL;
	char* region_end_token = NULL;

	char* region_name = NULL;
	char* region_chr = NULL;
	int region_start = 0;
	int region_end = 0;

	IntervalTree<char*>* regions_index = NULL;

	if ((gwafile == NULL) || (regions_file == NULL)) {
		return;
	}

	try {
		descriptor = gwafile->get_descriptor();
		data_separator = gwafile->get_regions_file_data_separator();

		region_name_column = descriptor->get_property(Descriptor::REGION_NAME);
		region_chr_column = descriptor->get_property(Descriptor::REGION_CHR);
		region_start_column = descriptor->get_property(Descriptor::REGION_START);
		region_end_column = descriptor->get_property(Descriptor::REGION_END);

		while ((line_length = regions_reader.read_line()) > 0) {
			line = *regions_reader.line;

			column_position = 0;
			region_name_token = NULL;
			region_chr_token = NULL;
			region_start_token = NULL;
			region_end_token = NULL;
			token = auxiliary::strtok(&line, data_separator);
			while (token != NULL) {
				if (column_position == region_name_column_pos) {
					region_name_token = token;
				} else if (column_position == region_chr_column_pos) {
					region_chr_token = token;
				} else if (column_position == region_start_column_pos) {
					region_start_token = token;
				} else if (column_position == region_end_column_pos) {
					region_end_token = token;
				}
				token = auxiliary::strtok(&line, data_separator);
				++column_position;
			}

			if (column_position < regions_file_total_columns) {
				throw AnnotatorException("Annotator", "process_regions_file_data()", __LINE__, 8, line_number, regions_file, column_position, regions_file_total_columns);
			} else if (column_position > regions_file_total_columns) {
				throw AnnotatorException("Annotator", "process_regions_file_data()", __LINE__, 9, line_number, regions_file, column_position, regions_file_total_columns);
			}

			region_name = (char*)malloc((strlen(region_name_token) + 1u) * sizeof(char));
			if (region_name == NULL) {
				throw AnnotatorException("Annotator", "process_regions_file_data()",  __LINE__, 2, ((strlen(region_name_token) + 1u) * sizeof(char)));
			}
			strcpy(region_name, region_name_token);

			region_start = (int)strtol(region_start_token, &end_ptr, 10);
			if (*end_ptr != '\0') {
				throw AnnotatorException("Annotator", "process_regions_file_data()",  __LINE__, 10, region_start_token, region_start_column, line_number);
			}

			if (region_start < 0) {
				throw AnnotatorException("Annotator", "process_regions_file_data()",  __LINE__, 11, region_start_column, region_start, line_number);
			}

			region_end = (int)strtol(region_end_token, &end_ptr, 10);
			if (*end_ptr != '\0') {
				throw AnnotatorException("Annotator", "process_regions_file_data()",  __LINE__, 10, region_end_token, region_end_column, line_number);
			}

			if (region_end < 0) {
				throw AnnotatorException("Annotator", "process_regions_file_data()",  __LINE__, 11, region_end_column, region_end, line_number);
			}

			if (region_start > region_end) {
				throw AnnotatorException("Annotator", "process_regions_file_data()",  __LINE__, 12, region_start_column, region_start, region_end, region_end_column, line_number);
			}

			regions_indices_it = regions_indices.find(region_chr_token);
			if (regions_indices_it == regions_indices.end()) {
				regions_index = new IntervalTree<char*>();

				region_chr = (char*)malloc((strlen(region_chr_token) + 1u) * sizeof(char));
				if (region_chr == NULL) {
					throw AnnotatorException("Annotator", "process_regions_file_data()",  __LINE__, 2, ((strlen(region_chr_token) + 1u) * sizeof(char)));
				}
				strcpy(region_chr, region_chr_token);

				regions_indices.insert(pair<char*, IntervalTree<char*>* >(region_chr, regions_index));
			} else {
				regions_index = regions_indices_it->second;
			}

			regions_index->add(region_start, region_end, region_name);
			++line_number;
		}

		if (line_length == 0) {
			throw AnnotatorException("Annotator", "process_regions_file_data()", __LINE__, 13, line_number, regions_file);
		}
	} catch (ReaderException &e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "process_regions_file_data()", __LINE__, 14, regions_file);
		throw new_e;
	} catch (DescriptorException &e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "process_regions_file_data()", __LINE__, 14, regions_file);
		throw new_e;
	} catch (AnnotatorException &e) {
		e.add_message("Annotator", "process_regions_file_data()", __LINE__, 14, regions_file);
		throw;
	}
}
Exemplo n.º 12
0
void Formatter::process_header() throw (FormatterException) {
	Descriptor* descriptor = NULL;
	char header_separator = '\0';
	char* header = NULL;
	char* token = NULL;
	int column_position = 0;
	const char* column_name = NULL;
	const char* new_column_name = NULL;

	Column* column = NULL;

	if (gwafile == NULL) {
		return;
	}

	try {
		if (reader.read_line() <= 0) {
			throw FormatterException("Formatter", "process_header()", __LINE__, 5, gwafile->get_descriptor()->get_name());
		}

		descriptor = gwafile->get_descriptor();
		header_separator = gwafile->get_header_separator();
		header = *reader.line;

		token = auxiliary::strtok(&header, header_separator);
		while (token != NULL) {
			column = new Column();

			new_column_name = descriptor->get_renamed_column(token);
			if (new_column_name == NULL) {
				new_column_name = token;
			}

			column_name = descriptor->get_default_column(new_column_name, gwafile->is_case_sensitive());
			if (column_name != NULL) {
				if (strcmp(column_name, Descriptor::MARKER) == 0) {
				} else if (strcmp(column_name, Descriptor::CHR) == 0) {
				} else if (strcmp(column_name, Descriptor::POSITION) == 0) {
				} else if (strcmp(column_name, Descriptor::ALLELE1) == 0) {
				} else if (strcmp(column_name, Descriptor::ALLELE2) == 0) {
				} else if (strcmp(column_name, Descriptor::STRAND) == 0) {
				} else if (strcmp(column_name, Descriptor::EFFECT) == 0) {
				} else if (strcmp(column_name, Descriptor::STDERR) == 0) {
					stderr_column = column;
				} else if (strcmp(column_name, Descriptor::PVALUE) == 0) {
					pvalue_column = column;
					pvalue_column_pos = column_position;
				} else if (strcmp(column_name, Descriptor::FREQLABEL) == 0) {
					maf_column = column;
					maf_column_pos = column_position;
				} else if (strcmp(column_name, Descriptor::HWE_PVAL) == 0) {
				} else if (strcmp(column_name, Descriptor::CALLRATE) == 0) {
				} else if (strcmp(column_name, Descriptor::N_TOTAL) == 0) {
					n_total_column = column;
				} else if (strcmp(column_name, Descriptor::IMPUTED) == 0) {
				} else if (strcmp(column_name, Descriptor::USED_FOR_IMP) == 0) {
				} else if (strcmp(column_name, Descriptor::OEVAR_IMP) == 0) {
					oevar_imp_column = column;
					oevar_imp_column_pos = column_position;
				} else if (strcmp(column_name, Descriptor::AVPOSTPROB) == 0) {
				}
			}

			column->set_header(new_column_name);
			column->set_order(descriptor->get_column_order(new_column_name, gwafile->is_case_sensitive()));

			input_columns.push_back(column);

			token = auxiliary::strtok(&header, header_separator);
			column_position += 1;
		}
	} catch (ReaderException &e) {
		FormatterException new_e(e);
		new_e.add_message("Formatter", "process_header()", __LINE__, 6, gwafile->get_descriptor()->get_name());
		throw new_e;
	} catch (DescriptorException &e) {
		FormatterException new_e(e);
		new_e.add_message("Formatter", "process_header()", __LINE__, 6, gwafile->get_descriptor()->get_name());
		throw new_e;
	}
}
Exemplo n.º 13
0
void Annotator::process_regions_file_header() throw (AnnotatorException) {
	Descriptor* descriptor = NULL;
	char header_separator = '\0';
	char* header = NULL;
	char* token = NULL;

	int column_position = 0;

	const char* region_name_column = NULL;
	const char* region_chr_column = NULL;
	const char* region_start_column = NULL;
	const char* region_end_column = NULL;

	if  ((gwafile == NULL) || (regions_file == NULL)) {
		return;
	}

	try {
		descriptor = gwafile->get_descriptor();
		header_separator = gwafile->get_regions_file_header_separator();

		if (regions_reader.read_line() <= 0) {
			throw AnnotatorException("Annotator", "process_regions_file_header()", __LINE__, 5, 1, regions_file);
		}

		header = *regions_reader.line;

		region_name_column = descriptor->get_property(Descriptor::REGION_NAME);
		region_chr_column = descriptor->get_property(Descriptor::REGION_CHR);
		region_start_column = descriptor->get_property(Descriptor::REGION_START);
		region_end_column = descriptor->get_property(Descriptor::REGION_END);

		regions_file_total_columns = numeric_limits<int>::min();
		region_name_column_pos = numeric_limits<int>::min();
		region_chr_column_pos = numeric_limits<int>::min();
		region_start_column_pos = numeric_limits<int>::min();
		region_end_column_pos = numeric_limits<int>::min();

		token = auxiliary::strtok(&header, header_separator);
		if (gwafile->is_case_sensitive()) {
			while (token != NULL) {
				if (strcmp(token, region_name_column) == 0) {
					region_name_column_pos = column_position;
				} else if (strcmp(token, region_chr_column) == 0) {
					region_chr_column_pos = column_position;
				} else if (strcmp(token, region_start_column) == 0) {
					region_start_column_pos = column_position;
				} else if (strcmp(token, region_end_column) == 0) {
					region_end_column_pos = column_position;
				}
				token = auxiliary::strtok(&header, header_separator);
				++column_position;
			}
		} else {
			while (token != NULL) {
				if (auxiliary::strcmp_ignore_case(token, region_name_column) == 0) {
					region_name_column_pos = column_position;
				} else if (auxiliary::strcmp_ignore_case(token, region_chr_column) == 0) {
					region_chr_column_pos = column_position;
				} else if (auxiliary::strcmp_ignore_case(token, region_start_column) == 0) {
					region_start_column_pos = column_position;
				} else if (auxiliary::strcmp_ignore_case(token, region_end_column) == 0) {
					region_end_column_pos = column_position;
				}
				token = auxiliary::strtok(&header, header_separator);
				++column_position;
			}
		}

		regions_file_total_columns = column_position;

		if (region_name_column_pos < 0) {
			throw AnnotatorException("Annotator", "process_regions_file_header()", __LINE__, 7, (region_name_column != NULL) ? region_name_column : "NULL", regions_file);
		}

		if (region_chr_column_pos < 0) {
			throw AnnotatorException("Annotator", "process_regions_file_header()", __LINE__, 7, (region_chr_column != NULL) ? region_chr_column : "NULL", regions_file);
		}

		if (region_start_column_pos < 0) {
			throw AnnotatorException("Annotator", "process_regions_file_header()", __LINE__, 7, (region_start_column != NULL) ? region_start_column : "NULL", regions_file);
		}

		if (region_end_column_pos < 0) {
			throw AnnotatorException("Annotator", "process_regions_file_header()", __LINE__, 7, (region_end_column != NULL) ? region_end_column : "NULL", regions_file);
		}
	} catch (ReaderException &e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "process_regions_file_header()", __LINE__, 6, regions_file);
		throw new_e;
	} catch (DescriptorException &e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "process_regions_file_header()", __LINE__, 6, regions_file);
		throw new_e;
	}
}
Exemplo n.º 14
0
double Formatter::calculate_lambda(int& n_total, int& n_filtered) throw (FormatterException) {
	Descriptor* descriptor = NULL;

	char* line = NULL;
	int line_length = 0;
	unsigned int line_number = 2;
	char data_separator = '\0';

	vector<double>* snp_hq = NULL;
	double maf_filter_value = -numeric_limits<double>::infinity();
	double oevar_imp_filter_value = -numeric_limits<double>::infinity();
	bool maf_filter = false;
	bool oevar_imp_filter = false;

	char* token = NULL;
	char* pvalue_token = NULL;
	char* maf_token = NULL;
	char* oevar_imp_token = NULL;
	int column_position = 0;

	char* end_ptr = NULL;
	double d_value = 0.0;

	int n = 0;
	double* data = NULL;
	double* new_data = NULL;
	int current_heap_size = HEAP_SIZE;

	double lambda = numeric_limits<double>::quiet_NaN();

	n_total = 0;
	n_filtered = 0;

	if ((gwafile == NULL) || (pvalue_column_pos < 0)) {
		return lambda;
	}

	try {
		descriptor = gwafile->get_descriptor();
		snp_hq = descriptor->get_threshold(Descriptor::SNP_HQ);
		data_separator = gwafile->get_data_separator();

		if (((maf_filter_value = snp_hq->at(0)) > 0) && (maf_column_pos >= 0)){
			maf_filter = true;
		}

		if (((oevar_imp_filter_value = snp_hq->at(1)) > 0) && (oevar_imp_column_pos >= 0)) {
			oevar_imp_filter = true;
		}

		data = (double*)malloc(HEAP_SIZE * sizeof(double));
		if (data == NULL) {
			throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 2, HEAP_SIZE * sizeof(double));
		}

		if (maf_filter) {
			if (oevar_imp_filter) {
				// all filters
				while ((line_length = reader.read_line()) > 0) {
					line = *reader.line;

					column_position = 0;
					pvalue_token = NULL;
					maf_token = NULL;
					oevar_imp_token = NULL;
					token = auxiliary::strtok(&line, data_separator);
					while (token != NULL) {
						if (column_position == pvalue_column_pos) {
							auxiliary::trim(&token);
							pvalue_token = token;
						} else if (column_position == maf_column_pos) {
							auxiliary::trim(&token);
							maf_token = token;
						} else if (column_position == oevar_imp_column_pos) {
							auxiliary::trim(&token);
							oevar_imp_token = token;
						}

						token = auxiliary::strtok(&line, data_separator);
						column_position += 1;
					}

					if (pvalue_token == NULL) {
						throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 8, pvalue_column->get_header(), line_number, gwafile->get_descriptor()->get_name());
					}

					if (maf_token == NULL) {
						throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 8, maf_column->get_header(), line_number, gwafile->get_descriptor()->get_name());
					}

					if (oevar_imp_token == NULL) {
						throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 8, oevar_imp_column->get_header(), line_number, gwafile->get_descriptor()->get_name());
					}

					d_value = R_strtod(maf_token, &end_ptr);
					if ((*end_ptr != '\0') || (isnan(d_value))) {
						line_number += 1;
						continue;
					}

					d_value = d_value > 0.5 ? 1.0 - d_value : d_value;

					if (auxiliary::fcmp(d_value, maf_filter_value, EPSILON) == 1) {
						d_value = R_strtod(oevar_imp_token, &end_ptr);
						if ((*end_ptr != '\0') || (isnan(d_value))) {
							line_number += 1;
							continue;
						}

						if (auxiliary::fcmp(d_value, oevar_imp_filter_value, EPSILON) == 1) {
							d_value = R_strtod(pvalue_token, &end_ptr);
							if ((*end_ptr != '\0') || (isnan(d_value))) {
								line_number += 1;
								continue;
							}

							n += 1;
							if (n > current_heap_size) {
								current_heap_size += HEAP_INCREMENT;

								new_data = (double*)realloc(data, current_heap_size * sizeof(double));
								if (new_data == NULL) {
									free(data);
									data = NULL;
									throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 2, current_heap_size * sizeof(double));
								}
								data = new_data;
							}

							data[n - 1] = d_value;
						}
					}

					line_number += 1;
				}
			} else {
				// only maf filter
				while ((line_length = reader.read_line()) > 0) {
					line = *reader.line;

					column_position = 0;
					pvalue_token = NULL;
					maf_token = NULL;
					token = auxiliary::strtok(&line, data_separator);
					while (token != NULL) {
						if (column_position == pvalue_column_pos) {
							auxiliary::trim(&token);
							pvalue_token = token;
						} else if (column_position == maf_column_pos) {
							auxiliary::trim(&token);
							maf_token = token;
						}

						token = auxiliary::strtok(&line, data_separator);
						column_position += 1;
					}

					if (pvalue_token == NULL) {
						throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 8, pvalue_column->get_header(), line_number, gwafile->get_descriptor()->get_name());
					}

					if (maf_token == NULL) {
						throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 8, maf_column->get_header(), line_number, gwafile->get_descriptor()->get_name());
					}

					d_value = R_strtod(maf_token, &end_ptr);
					if ((*end_ptr != '\0') || (isnan(d_value))) {
						line_number += 1;
						continue;
					}

					d_value = d_value > 0.5 ? 1.0 - d_value : d_value;

					if (auxiliary::fcmp(d_value, maf_filter_value, EPSILON) == 1) {
						d_value = R_strtod(pvalue_token, &end_ptr);
						if ((*end_ptr != '\0') || (isnan(d_value))) {
							line_number += 1;
							continue;
						}

						n += 1;
						if (n > current_heap_size) {
							current_heap_size += HEAP_INCREMENT;

							new_data = (double*)realloc(data, current_heap_size * sizeof(double));
							if (new_data == NULL) {
								free(data);
								data = NULL;
								throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 2, current_heap_size * sizeof(double));
							}
							data = new_data;
						}

						data[n - 1] = d_value;
					}

					line_number += 1;
				}
			}
		} else if (oevar_imp_filter) {
			// only oevar_imp_filter
			while ((line_length = reader.read_line()) > 0) {
				line = *reader.line;

				column_position = 0;
				pvalue_token = NULL;
				oevar_imp_token = NULL;
				token = auxiliary::strtok(&line, data_separator);
				while (token != NULL) {
					if (column_position == pvalue_column_pos) {
						auxiliary::trim(&token);
						pvalue_token = token;
					} else if (column_position == oevar_imp_column_pos) {
						auxiliary::trim(&token);
						oevar_imp_token = token;
					}

					token = auxiliary::strtok(&line, data_separator);
					column_position += 1;
				}

				if (pvalue_token == NULL) {
					throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 8, pvalue_column->get_header(), line_number, gwafile->get_descriptor()->get_name());
				}

				if (oevar_imp_token == NULL) {
					throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 8, oevar_imp_column->get_header(), line_number, gwafile->get_descriptor()->get_name());
				}

				d_value = R_strtod(oevar_imp_token, &end_ptr);
				if ((*end_ptr != '\0') || (isnan(d_value))) {
					line_number += 1;
					continue;
				}

				if (auxiliary::fcmp(d_value, oevar_imp_filter_value, EPSILON) == 1) {
					d_value = R_strtod(pvalue_token, &end_ptr);
					if ((*end_ptr != '\0') || (isnan(d_value))) {
						line_number += 1;
						continue;
					}

					n += 1;
					if (n > current_heap_size) {
						current_heap_size += HEAP_INCREMENT;

						new_data = (double*)realloc(data, current_heap_size * sizeof(double));
						if (new_data == NULL) {
							free(data);
							data = NULL;
							throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 2, current_heap_size * sizeof(double));
						}
						data = new_data;
					}

					data[n - 1] = d_value;
				}

				line_number += 1;
			}
		} else {
			// no filters
			while ((line_length = reader.read_line()) > 0) {
				line = *reader.line;

				column_position = 0;
				pvalue_token = NULL;
				token = auxiliary::strtok(&line, data_separator);
				while (token != NULL) {
					if (column_position == pvalue_column_pos) {
						auxiliary::trim(&token);
						pvalue_token = token;
						break;
					}

					token = auxiliary::strtok(&line, data_separator);
					column_position += 1;
				}

				if (pvalue_token == NULL) {
					throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 8, pvalue_column->get_header(), line_number, gwafile->get_descriptor()->get_name());
				}

				d_value = R_strtod(pvalue_token, &end_ptr);
				if ((*end_ptr != '\0') || (isnan(d_value))) {
					line_number += 1;
					continue;
				}

				n += 1;
				if (n > current_heap_size) {
					current_heap_size += HEAP_INCREMENT;

					new_data = (double*)realloc(data, current_heap_size * sizeof(double));
					if (new_data == NULL) {
						free(data);
						data = NULL;
						throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 2, current_heap_size * sizeof(double));
					}
					data = new_data;
				}

				data[n - 1] = d_value;

				line_number += 1;
			}
		}

		n_filtered = n;
		n_total = line_number - 2;

		if (line_length == 0) {
			throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 7, line_number, gwafile->get_descriptor()->get_name());
		}

		reader.reset();
		if (reader.read_line() <= 0) {
			throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 5, line_number, gwafile->get_descriptor()->get_name());
		}
	} catch (DescriptorException &e) {
		FormatterException new_e(e);
		new_e.add_message("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 9, gwafile->get_descriptor()->get_name());
		throw new_e;
	} catch (ReaderException &e) {
		FormatterException new_e(e);
		new_e.add_message("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 9, gwafile->get_descriptor()->get_name());
		throw new_e;
	} catch (FormatterException &e) {
		e.add_message("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 9, gwafile->get_descriptor()->get_name());
		throw;
	}

	if (n > 0) {
		for (int i = 0; i < n; i++) {
			data[i] = pow(Rf_qnorm5(0.5 * data[i], 0.0, 1.0, 0, 0), 2.0);
		}

		qsort(data, n, sizeof(double), auxiliary::dblcmp);

		lambda =  auxiliary::stats_median_from_sorted_data(data, n) / Rf_qchisq(0.5, 1.0, 0, 0);
	}

	free(data);
	data = NULL;

	return lambda;
}
Exemplo n.º 15
0
void Formatter::format(double lambda, char new_separator, int& n_total, int& n_filtered) throw (FormatterException) {
	Descriptor* descriptor = NULL;

	char* line = NULL;
	int line_length = 0;
	unsigned int line_number = 2;
	char* token = NULL;
	char data_separator = '\0';

	const char* output_prefix = NULL;
	const char* file_name = NULL;
	char* o_gwafile_name = NULL;

	vector<double>* snp_hq = NULL;
	double maf_filter_value = -numeric_limits<double>::infinity();
	double oevar_imp_filter_value = -numeric_limits<double>::infinity();
	bool maf_filter = false;
	bool oevar_imp_filter = false;

	Column* column = NULL;

	ofstream ofile_stream;

	double d_value = 0.0;

	n_total = 0;
	n_filtered = 0;

	if (gwafile == NULL) {
		return;
	}

	try {
		descriptor = gwafile->get_descriptor();
		output_prefix = descriptor->get_property(Descriptor::PREFIX);
		file_name = descriptor->get_name();
		snp_hq = descriptor->get_threshold(Descriptor::SNP_HQ);
		data_separator = gwafile->get_data_separator();

		auxiliary::transform_file_name(&o_gwafile_name, output_prefix, file_name, NULL, true);
		if (o_gwafile_name == NULL) {
			throw FormatterException("Formatter", "format( double , char , int& , int& )", __LINE__, 10);
		}

		if (((maf_filter_value = snp_hq->at(0)) > 0) && (maf_column_pos >= 0)){
			maf_filter = true;
		}

		if (((oevar_imp_filter_value = snp_hq->at(1)) > 0) && (oevar_imp_column_pos >= 0)) {
			oevar_imp_filter = true;
		}

		for (columns_it = input_columns.begin(); columns_it != input_columns.end(); columns_it++) {
			output_columns.push_back(*columns_it);
		}

		if (!isnan(lambda)) {
			if (stderr_column != NULL) {
				column = new CorrectedStandardErrorColumn(stderr_column, lambda);
				column->set_header("%s_gc", stderr_column->get_header());
				column->set_order(descriptor->get_column_order(column->get_header(), gwafile->is_case_sensitive()));
				output_columns.push_back(column);
			}

			if (pvalue_column != NULL) {
				column = new CorrectedPvalueColumn(pvalue_column, lambda);
				column->set_header("%s_gc", pvalue_column->get_header());
				column->set_order(descriptor->get_column_order(column->get_header(), gwafile->is_case_sensitive()));
				output_columns.push_back(column);
			}
		}

		if ((oevar_imp_column != NULL) && (n_total_column != NULL)) {
			column = new EffectiveSampleSizeColumn(n_total_column, oevar_imp_column);
			column->set_header("%s_effective", n_total_column->get_header());
			column->set_order(descriptor->get_column_order(column->get_header(), gwafile->is_case_sensitive()));
			output_columns.push_back(column);
		}

		if (gwafile->is_order_on()) {
			if (descriptor->get_reordered_columns_number() > 0) {
				stable_sort(output_columns.begin(), output_columns.end(), compare_columns);
			} else {
				stable_sort(output_columns.begin(), output_columns.end(), compare_columns_by_name);
			}
		}

		ofile_stream.exceptions(ios_base::failbit | ios_base::badbit);
		ofile_stream.precision(numeric_limits<double>::digits10);

		try {
			ofile_stream.open(o_gwafile_name);
		} catch (ofstream::failure &e) {
			throw FormatterException("Formatter", "format( double , char , int& , int& )", __LINE__, 11, o_gwafile_name);
		}

		try {
			columns_it = output_columns.begin();
			if (columns_it != output_columns.end()) {
				ofile_stream << (*columns_it)->get_header();
				while (++columns_it != output_columns.end()) {
					ofile_stream << new_separator << (*columns_it)->get_header();
				}
				ofile_stream << endl;
			}

			if (maf_filter) {
				if (oevar_imp_filter) {
					// all filters
					while ((line_length = reader.read_line()) > 0) {
						line = *reader.line;

//						for (columns_it = input_columns.begin(); columns_it != input_columns.end(); (*columns_it)->char_value = auxiliary::strtok(&line, data_separator), columns_it++);
						for (columns_it = input_columns.begin(); columns_it != input_columns.end(); ++columns_it) {
							token = auxiliary::strtok(&line, data_separator);
							if (token == NULL) {
								(*columns_it)->char_value = "";
							} else {
								auxiliary::trim(&token);
								(*columns_it)->char_value = token;
							}
//							auxiliary::trim(&token);
//							(*columns_it)->char_value = token;
						}

						d_value = maf_column->get_numeric_value();
						d_value = d_value > 0.5 ? 1.0 - d_value : d_value;
						if (auxiliary::fcmp(d_value, maf_filter_value, EPSILON) == 1) {
							d_value = oevar_imp_column->get_numeric_value();
							if (auxiliary::fcmp(d_value, oevar_imp_filter_value, EPSILON) == 1) {
								columns_it = output_columns.begin();
								if (columns_it != output_columns.end()) {
									(*columns_it)->out(ofile_stream);
									while (++columns_it != output_columns.end()) {
										ofile_stream << new_separator;
										(*columns_it)->out(ofile_stream);
									}
									ofile_stream << endl;
								}

								n_filtered += 1;
							}
						}

						line_number += 1;
					}
				} else {
					// only maf filter
					while ((line_length = reader.read_line()) > 0) {
						line = *reader.line;

//						for (columns_it = input_columns.begin(); columns_it != input_columns.end(); (*columns_it)->char_value = auxiliary::strtok(&line, data_separator), columns_it++);
						for (columns_it = input_columns.begin(); columns_it != input_columns.end(); ++columns_it) {
							token = auxiliary::strtok(&line, data_separator);
							if (token == NULL) {
								(*columns_it)->char_value = "";
							} else {
								auxiliary::trim(&token);
								(*columns_it)->char_value = token;
							}
//							auxiliary::trim(&token);
//							(*columns_it)->char_value = token;
						}

						d_value = maf_column->get_numeric_value();
						d_value = d_value > 0.5 ? 1.0 - d_value : d_value;
						if (auxiliary::fcmp(d_value, maf_filter_value, EPSILON) == 1) {
							columns_it = output_columns.begin();
							if (columns_it != output_columns.end()) {
								(*columns_it)->out(ofile_stream);
								while (++columns_it != output_columns.end()) {
									ofile_stream << new_separator;
									(*columns_it)->out(ofile_stream);
								}
								ofile_stream << endl;
							}

							n_filtered += 1;
						}

						line_number += 1;
					}
				}
			} else if (oevar_imp_filter) {
				// only oevar_imp filter
				while ((line_length = reader.read_line()) > 0) {
					line = *reader.line;

//					for (columns_it = input_columns.begin(); columns_it != input_columns.end(); (*columns_it)->char_value = auxiliary::strtok(&line, data_separator), columns_it++);
					for (columns_it = input_columns.begin(); columns_it != input_columns.end(); ++columns_it) {
						token = auxiliary::strtok(&line, data_separator);
						if (token == NULL) {
							(*columns_it)->char_value = "";
						} else {
							auxiliary::trim(&token);
							(*columns_it)->char_value = token;
						}
//						auxiliary::trim(&token);
//						(*columns_it)->char_value = token;
					}

					d_value = oevar_imp_column->get_numeric_value();
					if (auxiliary::fcmp(d_value, oevar_imp_filter_value, EPSILON) == 1) {
						columns_it = output_columns.begin();
						if (columns_it != output_columns.end()) {
							(*columns_it)->out(ofile_stream);
							while (++columns_it != output_columns.end()) {
								ofile_stream << new_separator;
								(*columns_it)->out(ofile_stream);
							}
							ofile_stream << endl;
						}

						n_filtered += 1;
					}

					line_number += 1;
				}
			} else {
				// no filters
				while ((line_length = reader.read_line()) > 0) {
					line = *reader.line;

//					for (columns_it = input_columns.begin(); columns_it != input_columns.end(); (*columns_it)->char_value = auxiliary::strtok(&line, data_separator), columns_it++);
					for (columns_it = input_columns.begin(); columns_it != input_columns.end(); ++columns_it) {
						token = auxiliary::strtok(&line, data_separator);
						if (token == NULL) {
							(*columns_it)->char_value = "";
						} else {
							auxiliary::trim(&token);
							(*columns_it)->char_value = token;
						}
//						auxiliary::trim(&token);
//						(*columns_it)->char_value = token;
					}

					columns_it = output_columns.begin();
					if (columns_it != output_columns.end()) {
						(*columns_it)->out(ofile_stream);
						while (++columns_it != output_columns.end()) {
							ofile_stream << new_separator;
							(*columns_it)->out(ofile_stream);
						}
						ofile_stream << endl;
					}

					n_filtered += 1;

					line_number += 1;
				}
			}
		} catch (ofstream::failure &e) {
			throw FormatterException("Formatter", "format( double , char , int& , int& )", __LINE__, 13, o_gwafile_name);
		}

		try {
			ofile_stream.close();
		} catch (ofstream::failure &e) {
			throw FormatterException("Formatter", "format( double , char , int& , int& )", __LINE__, 12, o_gwafile_name);
		}

		n_total = line_number - 2;

		if (line_length == 0) {
			throw FormatterException("Formatter", "format( double , char , int& , int& )", __LINE__, 7, line_number, gwafile->get_descriptor()->get_name());
		}
	} catch (DescriptorException &e) {
		FormatterException new_e(e);
		e.add_message("Formatter", "format( double , char , int& , int& )", __LINE__, 14, gwafile->get_descriptor()->get_name());
		throw new_e;
	} catch (ColumnException &e) {
		FormatterException new_e(e);
		e.add_message("Formatter", "format( double , char , int& , int& )", __LINE__, 14, gwafile->get_descriptor()->get_name());
		throw new_e;
	} catch (FormatterException &e) {
		e.add_message("Formatter", "format( double , char , int& , int& )", __LINE__, 14, gwafile->get_descriptor()->get_name());
		throw;
	}
}
Exemplo n.º 16
0
void Annotator::annotate_with_map() throw (AnnotatorException) {
	Descriptor* descriptor = NULL;

	ofstream ofile_stream;

	const char* output_prefix = NULL;
	const char* file_name = NULL;
	char* output_file_name = NULL;

	char* line = NULL;
	int line_length = 0;
	unsigned int line_number = 2u;
	char* line_backup = NULL;

	char header_separator = '\0';
	char data_separator = '\0';
	char region_separator = '\0';
	bool regions_append = false;

	char* token = NULL;

	int column_position = 0;

	const char* column_name = NULL;

	marker_index key_marker_index;
	marker_index* found_marker_index = NULL;
	unsigned int found_marker_index_pos = 0u;
	unsigned int index = 0u;
	char* chr = NULL;
	int position = 0;

	vector<double>* deviation = NULL;
	int deviation_value = 0;

	IntervalTree<char*>* genes_index = NULL;
	IntervalTree<char*> genes_index_subset;

	map<int, vector<char*>*> annotated_genes;
	map<int, vector<char*>*>::iterator annotated_genes_it;

	if (gwafile == NULL) {
		return;
	}

	try {
		descriptor = gwafile->get_descriptor();
		output_prefix = descriptor->get_property(Descriptor::PREFIX);
		file_name = descriptor->get_name();
		header_separator = gwafile->get_header_separator();
		data_separator = gwafile->get_data_separator();
		deviation = descriptor->get_threshold(Descriptor::REGIONS_DEVIATION);
		regions_append = gwafile->is_regions_append_on();

		auxiliary::transform_file_name(&output_file_name, output_prefix, file_name, NULL, true);
		if (output_file_name == NULL) {
			throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 16);
		}

		if (regions_append) {
			line_backup = (char*)malloc(reader.get_buffer_size() * sizeof(char));
			if (line_backup == NULL) {
				throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 2, (reader.get_buffer_size() * sizeof(char)));
			}
		}

		if (data_separator == ',') {
			region_separator = ';';
		} else {
			region_separator = ',';
		}

		ofile_stream.exceptions(ios_base::failbit | ios_base::badbit);

		try {
			ofile_stream.open(output_file_name);
		} catch (ofstream::failure &e) {
			throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 17, output_file_name);
		}

		try {
			if (regions_append) {
				ofile_stream << header_backup;
				ofile_stream << header_separator;
				ofile_stream << ((column_name = descriptor->get_property(Descriptor::MAP_CHR)) != NULL ? column_name : Descriptor::MAP_CHR);
				ofile_stream << header_separator;
				ofile_stream << ((column_name = descriptor->get_property(Descriptor::MAP_POSITION)) != NULL ? column_name : Descriptor::MAP_POSITION);
			} else {
				ofile_stream << ((column_name = descriptor->get_column(Descriptor::MARKER)) != NULL ? column_name : Descriptor::MARKER);
				ofile_stream << header_separator;
				ofile_stream << ((column_name = descriptor->get_property(Descriptor::MAP_CHR)) != NULL ? column_name : Descriptor::MAP_CHR);
				ofile_stream << header_separator;
				ofile_stream << ((column_name = descriptor->get_property(Descriptor::MAP_POSITION)) != NULL ? column_name : Descriptor::MAP_POSITION);
			}
			for (unsigned int i = 0u; i < deviation->size(); ++i) {
				deviation_value = (int)deviation->at(i);
				if (deviation_value != 0) {
					ofile_stream << header_separator << "+/-" << deviation_value;
				} else {
					ofile_stream << header_separator << "IN";
				}
			}
			ofile_stream << endl;

			while ((line_length = reader.read_line()) > 0) {
				line = *reader.line;

				if (regions_append) {
					strcpy(line_backup, line);
				}

				column_position = 0;
				key_marker_index.name = NULL;
				token = auxiliary::strtok(&line, data_separator);
				while (token != NULL) {
					if (column_position == marker_column_pos) {
						key_marker_index.name = token;
					}
					token = auxiliary::strtok(&line, data_separator);
					++column_position;
				}

				if (column_position < total_columns) {
					throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 8, line_number, gwafile->get_descriptor()->get_name(), column_position, total_columns);
				} else if (column_position > total_columns) {
					throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 9, line_number, gwafile->get_descriptor()->get_name(), column_position, total_columns);
				}

				found_marker_index = (marker_index*)bsearch(&key_marker_index, map_index, map_index_size, sizeof(marker_index), qsort_marker_index_cmp);
				if (found_marker_index == NULL) {
					if (regions_append) {
						ofile_stream << line_backup << data_separator << "NA" << data_separator << "NA";
					} else {
						ofile_stream << key_marker_index.name << data_separator << "NA" << data_separator << "NA";
					}
					for (unsigned int i = 0u; i < deviation->size(); ++i) {
						ofile_stream << data_separator << "NA";
					}
					ofile_stream << endl;
				} else {
					found_marker_index_pos = found_marker_index - map_index;
					while ((found_marker_index_pos < map_index_size) && (auxiliary::strcmp_ignore_case(key_marker_index.name, map_index[found_marker_index_pos].name) == 0)) {
						index = map_index[found_marker_index_pos].index;
						chr = map_chromosomes[index];
						position = map_positions[index];

						if (regions_append) {
							ofile_stream << line_backup << data_separator << chr << data_separator << position;
						} else {
							ofile_stream << key_marker_index.name << data_separator << chr << data_separator << position;
						}

						regions_indices_it = regions_indices.find(chr);
						if (regions_indices_it != regions_indices.end()) {
							genes_index = regions_indices_it->second;

							deviation_value = (int)deviation->back();
							genes_index->get_intersecting_intervals(position - deviation_value, position + deviation_value, deviation_value, genes_index_subset);
							for (int i = deviation->size() - 2; i >= 0; --i) {
								deviation_value = (int)deviation->at(i);
								genes_index_subset.mark_intersecting_intervals(position - deviation_value, position + deviation_value, deviation_value);
							}

							genes_index_subset.get_marked_values(annotated_genes);

							if (annotated_genes.size() > 0) {
								for (unsigned int i = 0u; i < deviation->size(); ++i) {
									ofile_stream << data_separator;
									annotated_genes_it = annotated_genes.find(((int)deviation->at(i)));
									if (annotated_genes_it != annotated_genes.end()) {
										write_char_vector(ofile_stream, annotated_genes_it->second, region_separator);
									} else {
										ofile_stream << "NA";
									}
								}
								ofile_stream << endl;
							} else {
								for (unsigned int i = 0u; i < deviation->size(); ++i) {
									ofile_stream << data_separator << "NA";
								}
								ofile_stream << endl;
							}

							annotated_genes_it = annotated_genes.begin();
							while(annotated_genes_it != annotated_genes.end()) {
								delete annotated_genes_it->second;
								annotated_genes_it++;
							}
							annotated_genes.clear();

							genes_index_subset.clear();
						} else {
							for (unsigned int i = 0u; i < deviation->size(); ++i) {
								ofile_stream << data_separator << "NA";
							}
							ofile_stream << endl;
						}

						++found_marker_index_pos;
					}
				}

				++line_number;
			}
		} catch (ofstream::failure &e) {
			throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 19, output_file_name);
		}

		try {
			ofile_stream.close();
		} catch (ofstream::failure &e) {
			throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 18, output_file_name);
		}

		if (line_backup != NULL) {
			free(line_backup);
			line_backup = NULL;
		}

		if (line_length == 0) {
			throw AnnotatorException("Annotator", "annotate_with_map()", __LINE__, 13, line_number, gwafile->get_descriptor()->get_name());
		}
	} catch (DescriptorException &e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "annotate_with_map()", __LINE__, 14, gwafile->get_descriptor()->get_name());
		throw new_e;
	} catch (ReaderException &e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "annotate_with_map()", __LINE__, 14, gwafile->get_descriptor()->get_name());
		throw new_e;
	} catch (AnnotatorException &e) {
		e.add_message("Annotator", "annotate_with_map()", __LINE__, 14, gwafile->get_descriptor()->get_name());
		throw;
	}
}
Exemplo n.º 17
0
void Annotator::annotate_without_map() throw (AnnotatorException) {
	Descriptor* descriptor = NULL;

	ofstream ofile_stream;

	const char* output_prefix = NULL;
	const char* file_name = NULL;
	char* output_file_name = NULL;

	char* line = NULL;
	int line_length = 0;
	unsigned int line_number = 2u;

	char header_separator = '\0';
	char data_separator = '\0';
	char region_separator = '\0';
	bool regions_append = false;

	char* token = NULL;
	char* end_ptr = NULL;

	int column_position = 0;

	const char* column_name = NULL;

	char* marker_token = NULL;
	char* chr_token = NULL;
	char* position_token = NULL;

	int position = 0;

	vector<double>* deviation = NULL;
	int deviation_value = 0;

	IntervalTree<char*>* genes_index = NULL;
	IntervalTree<char*> genes_index_subset;

	map<int, vector<char*>*> annotated_genes;
	map<int, vector<char*>*>::iterator annotated_genes_it;

	if (gwafile == NULL) {
		return;
	}

	try {
		descriptor = gwafile->get_descriptor();
		output_prefix = descriptor->get_property(Descriptor::PREFIX);
		file_name = descriptor->get_name();
		header_separator = gwafile->get_header_separator();
		data_separator = gwafile->get_data_separator();
		deviation = descriptor->get_threshold(Descriptor::REGIONS_DEVIATION);
		regions_append = gwafile->is_regions_append_on();

		auxiliary::transform_file_name(&output_file_name, output_prefix, file_name, NULL, true);
		if (output_file_name == NULL) {
			throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 16);
		}

		if (data_separator == ',') {
			region_separator = ';';
		} else {
			region_separator = ',';
		}

		ofile_stream.exceptions(ios_base::failbit | ios_base::badbit);

		try {
			ofile_stream.open(output_file_name);
		} catch (ofstream::failure &e) {
			throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 17, output_file_name);
		}

		try {
			if (regions_append) {
				ofile_stream << header_backup;
			} else {
				ofile_stream << ((column_name = descriptor->get_column(Descriptor::MARKER)) != NULL ? column_name : Descriptor::MARKER);
				ofile_stream << header_separator;
				ofile_stream << ((column_name = descriptor->get_column(Descriptor::CHR)) != NULL ? column_name : Descriptor::CHR);
				ofile_stream << header_separator;
				ofile_stream << ((column_name = descriptor->get_column(Descriptor::POSITION)) != NULL ? column_name : Descriptor::POSITION);
			}
			for (unsigned int i = 0u; i < deviation->size(); ++i) {
				deviation_value = (int)deviation->at(i);
				if (deviation_value != 0) {
					ofile_stream << header_separator << "+/-" << deviation_value;
				} else {
					ofile_stream << header_separator << "IN";
				}
			}
			ofile_stream << endl;

			while ((line_length = reader.read_line()) > 0) {
				line = *reader.line;

				if (regions_append) {
					ofile_stream << line;
				}

				column_position = 0;
				marker_token = NULL;
				chr_token = NULL;
				position_token = NULL;
				token = auxiliary::strtok(&line, data_separator);
				while (token != NULL) {
					if (column_position == marker_column_pos) {
						marker_token = token;
					} else if (column_position == chr_column_pos) {
						chr_token = token;
					} else if (column_position == position_column_pos) {
						position_token = token;
					}
					token = auxiliary::strtok(&line, data_separator);
					++column_position;
				}

				if (column_position < total_columns) {
					throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 8, line_number, gwafile->get_descriptor()->get_name(), column_position, total_columns);
				} else if (column_position > total_columns) {
					throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 9, line_number, gwafile->get_descriptor()->get_name(), column_position, total_columns);
				}

				position = (int)strtol(position_token, &end_ptr, 10);
				if (*end_ptr != '\0') {
					throw AnnotatorException("Annotator", "annotate_without_map()",  __LINE__, 10, position_token, ((column_name = descriptor->get_column(Descriptor::POSITION)) != NULL) ? column_name : Descriptor::POSITION, line_number);
				}

				if (position < 0) {
					throw AnnotatorException("Annotator", "annotate_without_map()",  __LINE__, 11, ((column_name = descriptor->get_column(Descriptor::POSITION)) != NULL) ? column_name : Descriptor::POSITION, position, line_number);
				}

				if (!regions_append) {
					ofile_stream << marker_token << data_separator << chr_token << data_separator << position_token;
				}

				regions_indices_it = regions_indices.find(chr_token);
				if (regions_indices_it != regions_indices.end()) {
					genes_index = regions_indices_it->second;

					deviation_value = (int)deviation->back();
					genes_index->get_intersecting_intervals(position - deviation_value, position + deviation_value, deviation_value, genes_index_subset);
					for (int i = deviation->size() - 2; i >= 0; --i) {
						deviation_value = (int)deviation->at(i);
						genes_index_subset.mark_intersecting_intervals(position - deviation_value, position + deviation_value, deviation_value);
					}

					genes_index_subset.get_marked_values(annotated_genes);

					if (annotated_genes.size() > 0) {
						for (unsigned int i = 0u; i < deviation->size(); ++i) {
							ofile_stream << data_separator;
							annotated_genes_it = annotated_genes.find(((int)deviation->at(i)));
							if (annotated_genes_it != annotated_genes.end()) {
								write_char_vector(ofile_stream, annotated_genes_it->second, region_separator);
							} else {
								ofile_stream << "NA";
							}
						}
						ofile_stream << endl;
					} else {
						for (unsigned int i = 0u; i < deviation->size(); ++i) {
							ofile_stream << data_separator << "NA";
						}
						ofile_stream << endl;
					}

					annotated_genes_it = annotated_genes.begin();
					while(annotated_genes_it != annotated_genes.end()) {
						delete annotated_genes_it->second;
						annotated_genes_it++;
					}
					annotated_genes.clear();

					genes_index_subset.clear();
				} else {
					for (unsigned int i = 0u; i < deviation->size(); ++i) {
						ofile_stream << data_separator << "NA";
					}
					ofile_stream << endl;
				}

				++line_number;
			}
		} catch (ofstream::failure &e) {
			throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 19, output_file_name);
		}

		try {
			ofile_stream.close();
		} catch (ofstream::failure &e) {
			throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 18, output_file_name);
		}

		if (line_length == 0) {
			throw AnnotatorException("Annotator", "annotate_without_map()", __LINE__, 13, line_number, gwafile->get_descriptor()->get_name());
		}
	} catch (DescriptorException &e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "annotate_without_map()", __LINE__, 14, gwafile->get_descriptor()->get_name());
		throw new_e;
	} catch (ReaderException &e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "annotate_without_map()", __LINE__, 14, gwafile->get_descriptor()->get_name());
		throw new_e;
	} catch (AnnotatorException &e) {
		e.add_message("Annotator", "annotate_without_map()", __LINE__, 14, gwafile->get_descriptor()->get_name());
		throw;
	}
}
Exemplo n.º 18
0
void Annotator::process_header_with_map() throw (AnnotatorException) {
	Descriptor* descriptor = NULL;
	char header_separator = '\0';
	char* header = NULL;
	char* token = NULL;
	int column_position = 0;
	const char* column_name = NULL;
	bool regions_append = false;

	if (gwafile == NULL) {
		return;
	}

	try {
		descriptor = gwafile->get_descriptor();
		header_separator = gwafile->get_header_separator();
		regions_append = gwafile->is_regions_append_on();

		if (reader.read_line() <= 0) {
			throw AnnotatorException("Annotator", "process_header_with_map()", __LINE__, 5, 1, gwafile->get_descriptor()->get_name());
		}

		header = *reader.line;

		if (regions_append) {
			header_backup = (char*)malloc((strlen(header) + 1u) * sizeof(char));
			if (header_backup == NULL) {
				throw AnnotatorException("Annotator", "process_header_with_map()", __LINE__, 2, ((strlen(header) + 1u) * sizeof(char)));
			}
			strcpy(header_backup, header);
		}

		total_columns = numeric_limits<int>::min();
		marker_column_pos = numeric_limits<int>::min();

		token = auxiliary::strtok(&header, header_separator);
		while (token != NULL) {
			column_name = descriptor->get_default_column(token, gwafile->is_case_sensitive());
			if (column_name != NULL) {
				if (strcmp(column_name, Descriptor::MARKER) == 0) {
					marker_column_pos = column_position;
				}
			}
			token = auxiliary::strtok(&header, header_separator);
			++column_position;
		}

		total_columns = column_position;

		if (marker_column_pos < 0) {
			throw AnnotatorException("Annotator", "process_header_with_map()", __LINE__, 7, ((column_name = descriptor->get_column(Descriptor::MARKER)) != NULL) ? column_name : Descriptor::MARKER, gwafile->get_descriptor()->get_name());
		}
	} catch (ReaderException &e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "process_header_with_map()", __LINE__, 6, gwafile->get_descriptor()->get_name());
		throw new_e;
	} catch (DescriptorException &e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "process_header_with_map()", __LINE__, 6, gwafile->get_descriptor()->get_name());
		throw new_e;
	}
}
Exemplo n.º 19
0
void Annotator::process_map_file_header() throw (AnnotatorException) {
	Descriptor* descriptor = NULL;
	char header_separator = '\0';
	char* header = NULL;
	char* token = NULL;

	int column_position = 0;

	const char* map_marker_column = NULL;
	const char* map_chr_column = NULL;
	const char* map_position_column = NULL;

	if  ((gwafile == NULL) || (map_file == NULL)) {
		return;
	}

	try {
		descriptor = gwafile->get_descriptor();
		header_separator = gwafile->get_regions_file_header_separator();

		if (map_reader.read_line() <= 0) {
			throw AnnotatorException("Annotator", "process_map_file_header()", __LINE__, 5, 1, map_file);
		}

		header = *map_reader.line;

		map_marker_column = descriptor->get_property(Descriptor::MAP_MARKER);
		map_chr_column = descriptor->get_property(Descriptor::MAP_CHR);
		map_position_column = descriptor->get_property(Descriptor::MAP_POSITION);

		map_file_total_columns = numeric_limits<int>::min();
		map_marker_column_pos = numeric_limits<int>::min();
		map_chr_column_pos = numeric_limits<int>::min();
		map_position_column_pos = numeric_limits<int>::min();

		token = auxiliary::strtok(&header, header_separator);
		if (gwafile->is_case_sensitive()) {
			while (token != NULL) {
				if (strcmp(token, map_marker_column) == 0) {
					map_marker_column_pos = column_position;
				} else if (strcmp(token, map_chr_column) == 0) {
					map_chr_column_pos = column_position;
				} else if (strcmp(token, map_position_column) == 0) {
					map_position_column_pos = column_position;
				}
				token = auxiliary::strtok(&header, header_separator);
				++column_position;
			}
		} else {
			while (token != NULL) {
				if (auxiliary::strcmp_ignore_case(token, map_marker_column) == 0) {
					map_marker_column_pos = column_position;
				} else if (auxiliary::strcmp_ignore_case(token, map_chr_column) == 0) {
					map_chr_column_pos = column_position;
				} else if (auxiliary::strcmp_ignore_case(token, map_position_column) == 0) {
					map_position_column_pos = column_position;
				}
				token = auxiliary::strtok(&header, header_separator);
				++column_position;
			}
		}

		map_file_total_columns = column_position;

		if (map_marker_column_pos < 0) {
			throw AnnotatorException("Annotator", "process_map_file_header()", __LINE__, 7, (map_marker_column != NULL) ? map_marker_column : "NULL", map_file);
		}

		if (map_chr_column_pos < 0) {
			throw AnnotatorException("Annotator", "process_map_file_header()", __LINE__, 7, (map_chr_column != NULL) ? map_chr_column : "NULL", map_file);
		}

		if (map_position_column_pos < 0) {
			throw AnnotatorException("Annotator", "process_map_file_header()", __LINE__, 7, (map_position_column != NULL) ? map_position_column : "NULL", map_file);
		}
	} catch (ReaderException &e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "process_map_file_header()", __LINE__, 6, map_file);
		throw new_e;
	} catch (DescriptorException &e) {
		AnnotatorException new_e(e);
		new_e.add_message("Annotator", "process_map_file_header()", __LINE__, 6, map_file);
		throw new_e;
	}
}