Ejemplo n.º 1
0
void CorrectedPvalueColumn::out(ofstream& output_stream) throw (ofstream::failure) {
	numeric_value = R_strtod(pvalue_column->char_value, &end_ptr);
	if ((*end_ptr != '\0') || (ISNAN(numeric_value))) {
		output_stream << "NA";
	} else {
		if (lambda > 1.0) {
			numeric_value = Rf_pchisq((Rf_qchisq(numeric_value, 1, 0, 0) / lambda), 1, 0, 0);
		}
		output_stream << numeric_value;
	}
}
Ejemplo n.º 2
0
double Formatter::calculate_lambda(int& n_total, int& n_filtered) throw (FormatterException) {
	Descriptor* descriptor = NULL;

	char* line = NULL;
	int line_length = 0;
	unsigned int line_number = 2;
	char data_separator = '\0';

	vector<double>* snp_hq = NULL;
	double maf_filter_value = -numeric_limits<double>::infinity();
	double oevar_imp_filter_value = -numeric_limits<double>::infinity();
	bool maf_filter = false;
	bool oevar_imp_filter = false;

	char* token = NULL;
	char* pvalue_token = NULL;
	char* maf_token = NULL;
	char* oevar_imp_token = NULL;
	int column_position = 0;

	char* end_ptr = NULL;
	double d_value = 0.0;

	int n = 0;
	double* data = NULL;
	double* new_data = NULL;
	int current_heap_size = HEAP_SIZE;

	double lambda = numeric_limits<double>::quiet_NaN();

	n_total = 0;
	n_filtered = 0;

	if ((gwafile == NULL) || (pvalue_column_pos < 0)) {
		return lambda;
	}

	try {
		descriptor = gwafile->get_descriptor();
		snp_hq = descriptor->get_threshold(Descriptor::SNP_HQ);
		data_separator = gwafile->get_data_separator();

		if (((maf_filter_value = snp_hq->at(0)) > 0) && (maf_column_pos >= 0)){
			maf_filter = true;
		}

		if (((oevar_imp_filter_value = snp_hq->at(1)) > 0) && (oevar_imp_column_pos >= 0)) {
			oevar_imp_filter = true;
		}

		data = (double*)malloc(HEAP_SIZE * sizeof(double));
		if (data == NULL) {
			throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 2, HEAP_SIZE * sizeof(double));
		}

		if (maf_filter) {
			if (oevar_imp_filter) {
				// all filters
				while ((line_length = reader.read_line()) > 0) {
					line = *reader.line;

					column_position = 0;
					pvalue_token = NULL;
					maf_token = NULL;
					oevar_imp_token = NULL;
					token = auxiliary::strtok(&line, data_separator);
					while (token != NULL) {
						if (column_position == pvalue_column_pos) {
							auxiliary::trim(&token);
							pvalue_token = token;
						} else if (column_position == maf_column_pos) {
							auxiliary::trim(&token);
							maf_token = token;
						} else if (column_position == oevar_imp_column_pos) {
							auxiliary::trim(&token);
							oevar_imp_token = token;
						}

						token = auxiliary::strtok(&line, data_separator);
						column_position += 1;
					}

					if (pvalue_token == NULL) {
						throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 8, pvalue_column->get_header(), line_number, gwafile->get_descriptor()->get_name());
					}

					if (maf_token == NULL) {
						throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 8, maf_column->get_header(), line_number, gwafile->get_descriptor()->get_name());
					}

					if (oevar_imp_token == NULL) {
						throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 8, oevar_imp_column->get_header(), line_number, gwafile->get_descriptor()->get_name());
					}

					d_value = R_strtod(maf_token, &end_ptr);
					if ((*end_ptr != '\0') || (isnan(d_value))) {
						line_number += 1;
						continue;
					}

					d_value = d_value > 0.5 ? 1.0 - d_value : d_value;

					if (auxiliary::fcmp(d_value, maf_filter_value, EPSILON) == 1) {
						d_value = R_strtod(oevar_imp_token, &end_ptr);
						if ((*end_ptr != '\0') || (isnan(d_value))) {
							line_number += 1;
							continue;
						}

						if (auxiliary::fcmp(d_value, oevar_imp_filter_value, EPSILON) == 1) {
							d_value = R_strtod(pvalue_token, &end_ptr);
							if ((*end_ptr != '\0') || (isnan(d_value))) {
								line_number += 1;
								continue;
							}

							n += 1;
							if (n > current_heap_size) {
								current_heap_size += HEAP_INCREMENT;

								new_data = (double*)realloc(data, current_heap_size * sizeof(double));
								if (new_data == NULL) {
									free(data);
									data = NULL;
									throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 2, current_heap_size * sizeof(double));
								}
								data = new_data;
							}

							data[n - 1] = d_value;
						}
					}

					line_number += 1;
				}
			} else {
				// only maf filter
				while ((line_length = reader.read_line()) > 0) {
					line = *reader.line;

					column_position = 0;
					pvalue_token = NULL;
					maf_token = NULL;
					token = auxiliary::strtok(&line, data_separator);
					while (token != NULL) {
						if (column_position == pvalue_column_pos) {
							auxiliary::trim(&token);
							pvalue_token = token;
						} else if (column_position == maf_column_pos) {
							auxiliary::trim(&token);
							maf_token = token;
						}

						token = auxiliary::strtok(&line, data_separator);
						column_position += 1;
					}

					if (pvalue_token == NULL) {
						throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 8, pvalue_column->get_header(), line_number, gwafile->get_descriptor()->get_name());
					}

					if (maf_token == NULL) {
						throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 8, maf_column->get_header(), line_number, gwafile->get_descriptor()->get_name());
					}

					d_value = R_strtod(maf_token, &end_ptr);
					if ((*end_ptr != '\0') || (isnan(d_value))) {
						line_number += 1;
						continue;
					}

					d_value = d_value > 0.5 ? 1.0 - d_value : d_value;

					if (auxiliary::fcmp(d_value, maf_filter_value, EPSILON) == 1) {
						d_value = R_strtod(pvalue_token, &end_ptr);
						if ((*end_ptr != '\0') || (isnan(d_value))) {
							line_number += 1;
							continue;
						}

						n += 1;
						if (n > current_heap_size) {
							current_heap_size += HEAP_INCREMENT;

							new_data = (double*)realloc(data, current_heap_size * sizeof(double));
							if (new_data == NULL) {
								free(data);
								data = NULL;
								throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 2, current_heap_size * sizeof(double));
							}
							data = new_data;
						}

						data[n - 1] = d_value;
					}

					line_number += 1;
				}
			}
		} else if (oevar_imp_filter) {
			// only oevar_imp_filter
			while ((line_length = reader.read_line()) > 0) {
				line = *reader.line;

				column_position = 0;
				pvalue_token = NULL;
				oevar_imp_token = NULL;
				token = auxiliary::strtok(&line, data_separator);
				while (token != NULL) {
					if (column_position == pvalue_column_pos) {
						auxiliary::trim(&token);
						pvalue_token = token;
					} else if (column_position == oevar_imp_column_pos) {
						auxiliary::trim(&token);
						oevar_imp_token = token;
					}

					token = auxiliary::strtok(&line, data_separator);
					column_position += 1;
				}

				if (pvalue_token == NULL) {
					throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 8, pvalue_column->get_header(), line_number, gwafile->get_descriptor()->get_name());
				}

				if (oevar_imp_token == NULL) {
					throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 8, oevar_imp_column->get_header(), line_number, gwafile->get_descriptor()->get_name());
				}

				d_value = R_strtod(oevar_imp_token, &end_ptr);
				if ((*end_ptr != '\0') || (isnan(d_value))) {
					line_number += 1;
					continue;
				}

				if (auxiliary::fcmp(d_value, oevar_imp_filter_value, EPSILON) == 1) {
					d_value = R_strtod(pvalue_token, &end_ptr);
					if ((*end_ptr != '\0') || (isnan(d_value))) {
						line_number += 1;
						continue;
					}

					n += 1;
					if (n > current_heap_size) {
						current_heap_size += HEAP_INCREMENT;

						new_data = (double*)realloc(data, current_heap_size * sizeof(double));
						if (new_data == NULL) {
							free(data);
							data = NULL;
							throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 2, current_heap_size * sizeof(double));
						}
						data = new_data;
					}

					data[n - 1] = d_value;
				}

				line_number += 1;
			}
		} else {
			// no filters
			while ((line_length = reader.read_line()) > 0) {
				line = *reader.line;

				column_position = 0;
				pvalue_token = NULL;
				token = auxiliary::strtok(&line, data_separator);
				while (token != NULL) {
					if (column_position == pvalue_column_pos) {
						auxiliary::trim(&token);
						pvalue_token = token;
						break;
					}

					token = auxiliary::strtok(&line, data_separator);
					column_position += 1;
				}

				if (pvalue_token == NULL) {
					throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 8, pvalue_column->get_header(), line_number, gwafile->get_descriptor()->get_name());
				}

				d_value = R_strtod(pvalue_token, &end_ptr);
				if ((*end_ptr != '\0') || (isnan(d_value))) {
					line_number += 1;
					continue;
				}

				n += 1;
				if (n > current_heap_size) {
					current_heap_size += HEAP_INCREMENT;

					new_data = (double*)realloc(data, current_heap_size * sizeof(double));
					if (new_data == NULL) {
						free(data);
						data = NULL;
						throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 2, current_heap_size * sizeof(double));
					}
					data = new_data;
				}

				data[n - 1] = d_value;

				line_number += 1;
			}
		}

		n_filtered = n;
		n_total = line_number - 2;

		if (line_length == 0) {
			throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 7, line_number, gwafile->get_descriptor()->get_name());
		}

		reader.reset();
		if (reader.read_line() <= 0) {
			throw FormatterException("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 5, line_number, gwafile->get_descriptor()->get_name());
		}
	} catch (DescriptorException &e) {
		FormatterException new_e(e);
		new_e.add_message("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 9, gwafile->get_descriptor()->get_name());
		throw new_e;
	} catch (ReaderException &e) {
		FormatterException new_e(e);
		new_e.add_message("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 9, gwafile->get_descriptor()->get_name());
		throw new_e;
	} catch (FormatterException &e) {
		e.add_message("Formatter", "double calculate_lambda( int& , int& )", __LINE__, 9, gwafile->get_descriptor()->get_name());
		throw;
	}

	if (n > 0) {
		for (int i = 0; i < n; i++) {
			data[i] = pow(Rf_qnorm5(0.5 * data[i], 0.0, 1.0, 0, 0), 2.0);
		}

		qsort(data, n, sizeof(double), auxiliary::dblcmp);

		lambda =  auxiliary::stats_median_from_sorted_data(data, n) / Rf_qchisq(0.5, 1.0, 0, 0);
	}

	free(data);
	data = NULL;

	return lambda;
}