~Flim() { lambda_.free(); ones_.free(); kappa_.free(); estimates_.free(); q_lambda_.free(); singleton_expectation_.free(); empirical_pair_.free(); empirical_singleton_.free(); }
double optimizeAll() { double total_delta = 0.0; for (int x = 0; x < lambda_.nrow(); ++x) { for (int y = 0; y < x; ++y) { total_delta += optimizeLambda(y, x); } } return total_delta; }
RcppExport SEXP R_glmApply2( SEXP expression_, SEXP data_, SEXP pBigMat_, SEXP env_, SEXP terms_, SEXP nthreads_, SEXP useMean_, SEXP useIdentityLink_, SEXP univariate_, SEXP multivariate_, SEXP quiet_, SEXP cincl_){ BEGIN_RCPP CharacterVector expression( expression_ ); Environment env( env_ ); std::vector<int> terms = as<std::vector<int> >( terms_ ); int nthreads = as<int>( nthreads_ ); bool useMean = (bool) as<int>( useMean_ ); bool useIdentityLink = as<int>( useIdentityLink_ ); bool univariate = as<int>( univariate_ ); bool multivariate = as<int>( multivariate_ ); bool quiet = as<int>( quiet_ ); std::vector<int> cincl = as<std::vector<int> >( cincl_ ); regressionType regressType = useIdentityLink ? LINEAR : LOGISTIC; featureBatcher fbatch( data_, pBigMat_, 10000, cincl); // Get original number of threads int n_threads_original = omp_get_max_threads(); // Set threads to 1 omp_set_num_threads( nthreads ); // Intel paralellism #ifdef INTEL mkl_set_num_threads( 1 ); #endif // disable nested OpenMP parallelism omp_set_nested(0); // Process exression, X_loop and env Rexpress expr( as<string>( expression ), fbatch.get_N_indivs(), env ); RcppGSL::matrix<double> Y = expr.get_response_m(); int n_indivs = Y->size1; int n_pheno = Y->size2; if( n_pheno > 1 && regressType == LOGISTIC ){ Y.free(); throw invalid_argument("Cannot do multivariate logistic regression\n"); } RcppGSL::matrix<double> Xn = expr.get_model_matrix_null(); if( n_pheno == 1 ) multivariate = false; // Check that sizes of and X_loop match if( n_indivs != fbatch.get_N_indivs() ){ Y.free(); Xn.free(); throw invalid_argument("Dimensions of response and design matrix do not match\n"); } NumericMatrix pValues; NumericMatrix pValues_multivariate; // Define p-values matrix to be returned, only of the corresponding test is performed // Initialize values to NAN if( univariate ){ pValues = NumericMatrix( fbatch.get_N_features(), n_pheno ); std::fill(pValues.begin(), pValues.end(), NAN); } if( multivariate ){ pValues_multivariate = NumericMatrix( fbatch.get_N_features(), 2 ); std::fill(pValues_multivariate.begin(), pValues_multivariate.end(), NAN); } // X_clean = model.matrix.default( y ~ sex:One + age ) // Replace marker with 1's so that design matrix for marker j can be created by multiplcation RcppGSL::matrix<double> X_clean = expr.get_model_matrix_clean(); // If trying to access index beyond range // Exit gracefully if( terms[which_max(terms)] >= X_clean->size2 ){ Y.free(); Xn.free(); X_clean.free(); throw range_error("Element in \"terms\" is out of range"); } // get indeces of columns that depend on SNP vector<int> loopIndex = expr.get_loop_terms(); // Indeces that don't depend on SNP, i,e. are complementary vector<int> loopCompl; for(int i=0; i<X_clean->size2; i++){ if( ! is_in_vector(loopIndex, i) ){ loopCompl.push_back(i); } } // Extract covariate terms gsl_matrix *X_clean_cov = gsl_matrix_alloc(X_clean->size1, loopCompl.size()); gsl_matrix_sub_col(X_clean, loopCompl, X_clean_cov); LM_preproc *preproc = LM_preproc_alloc( Y, X_clean_cov); gsl_matrix_free(X_clean_cov); long batch_size = MAX( 1, fbatch.getBatchSize()/100.0 ); long tests_completed = 0; Progress p(0, false); time_t start_time; time(&start_time); bool useThisChunk; for(int i_set=0; i_set<fbatch.get_N_features(); i_set+=fbatch.getBatchSize()){ // Load data from binary matrix (or do noting if NumericMatrix is used) useThisChunk = fbatch.loadNextChunk(); if( ! useThisChunk ) continue; #pragma omp parallel { Preproc_workspace *proc_work = Preproc_workspace_alloc( Y, loopIndex.size(), preproc); // Variables local to each thread gsl_matrix *X = gsl_matrix_alloc( X_clean->size1, X_clean->size2 ); gsl_vector_view col_view, col_view_y; gsl_vector *marker_j = gsl_vector_alloc( fbatch.get_N_indivs() ); GLM_MV_workspace *workmv; GLM_IRLS_workspace *irls_workspace; gsl_vector *y, *beta; double sig_e, log_L; int rank; if( regressType == LINEAR ){ workmv = GLM_MV_workspace_alloc(Y->size1, X->size2, Y->size2, true, false); }else{ y = gsl_vector_attach_array( Y->data, Y->size1 ); beta = gsl_vector_calloc( X->size2 ); irls_workspace = GLM_IRLS_workspace_alloc( X->size1, X->size2, true); } double Hotelling, Pillai; bool isMissing; #pragma omp for //schedule(static, batch_size) for(int j=0; j<fbatch.getBatchSize(); j++){ // If element j+i_set is not include include list, than skip if( ! fbatch.contains_element(j+i_set) ){ continue; } #pragma omp critical tests_completed++; fbatch.getFeautureInChunk( j, marker_j ); if( useMean ) isMissing = gsl_vector_set_missing_mean( marker_j ); else isMissing = any_is_na( marker_j->data, marker_j->size ); // Check if X has an NA value // If it does, set the p-value to if( isMissing && ! useMean){ if( univariate ){ #pragma omp critical for(int k=0; k<n_pheno; k++){ pValues(j+i_set,k) = NA_REAL; } } #pragma omp critical if( multivariate ){ pValues_multivariate(j+i_set,0) = NA_REAL; pValues_multivariate(j+i_set,1) = NA_REAL; } continue; } // Copy clean version of matrices to active variables gsl_matrix_memcpy( X, X_clean ); for(int k=0; k<loopIndex.size(); k++){ // X_design[,loopIndex[k]] = X_design[,loopIndex[k]] * marker_j col_view = gsl_matrix_column( (gsl_matrix*)(X), loopIndex[k] ); gsl_vector_mul( &col_view.vector, marker_j ); } // Evaluate regression if( regressType == LINEAR ){ // Extract marker terms gsl_matrix *X_markers = gsl_matrix_alloc(X->size1, loopIndex.size()); gsl_matrix_sub_col(X, loopIndex, X_markers); gsl_vector *pvals = GLM_regression_preproc( Y, X_markers, workmv, preproc, proc_work); gsl_matrix_free(X_markers); #pragma omp critical for(int k=0; k<n_pheno; k++){ pValues(j+i_set,k) = gsl_vector_get(pvals, k); } gsl_vector_free( pvals ); /*if( multivariate ){ GLM_HotellingPillai_test( X, workmv, terms, &Hotelling, &Pillai ); #pragma omp critical { pValues_multivariate(j+i_set,0) = Hotelling; pValues_multivariate(j+i_set,1) = Pillai; } }*/ /*if( univariate ){ // Perform Wald test gsl_vector *pvals = GLM_wald_test( workmv, terms ); #pragma omp critical for(int k=0; k<n_pheno; k++){ pValues(j+i_set,k) = gsl_vector_get(pvals, k); } gsl_vector_free( pvals ); }*/ }else{ // LOGISTIC GLM_unpenalized_regression_IRLS(y, X, beta, &sig_e, &log_L, &rank, regressType, false, irls_workspace); #pragma omp critical pValues(j+i_set,0) = GLM_wald_test( beta, irls_workspace->Sigma, X->size1, terms, regressType, irls_workspace->design_is_singular); } } // END for Preproc_workspace_free(proc_work); gsl_matrix_free( X ); gsl_vector_free( marker_j ); if( regressType == LINEAR ){ GLM_MV_workspace_free( workmv ); }else{ GLM_IRLS_workspace_free( irls_workspace ); gsl_vector_free(y); gsl_vector_free(beta); } } // End parallel if( ! quiet ) Rcpp::Rcout << print_progress( tests_completed, fbatch.get_N_features_included(), 25, start_time); if( Progress::check_abort() ){ Y.free(); Xn.free(); X_clean.free(); return wrap(0); } } // End set loop // Restore original number of threads omp_set_num_threads( n_threads_original ); if( ! quiet ) Rcpp::Rcout << endl; Y.free(); Xn.free(); X_clean.free(); LM_preproc_free( preproc ); Rcpp::List res = Rcpp::List::create(Rcpp::Named("pValues") = pValues, Rcpp::Named("pValues_mv") = pValues_multivariate); return( res ); END_RCPP }
RcppExport SEXP R_lrgprApply( SEXP expression_, SEXP data_, SEXP pBigMat_, SEXP env_, SEXP terms_, SEXP EigenVectors_, SEXP EigenValues_, SEXP Wtilde_, SEXP rank_, SEXP chromosome_, SEXP location_, SEXP distance_, SEXP dcmp_features_, SEXP scale_, SEXP delta_, SEXP reEstimateDelta_, SEXP nthreads_, SEXP quiet_, SEXP cincl_){ //BEGIN_RCPP CharacterVector expression( expression_ ); Environment env( env_ ); std::vector<int> terms = as<std::vector<int> >( terms_ ); RcppGSL::matrix<double> eigenVectors = EigenVectors_; RcppGSL::vector<double> eigenValues = EigenValues_; RcppGSL::matrix<double> Wtilde = Wtilde_; int rank = as<int>( rank_ ) ; std::vector<string> chromosome = as<std::vector<string> >(chromosome_); std::vector<double> location = as<std::vector<double> >(location_); double distance = as<double>( distance_ ); std::vector<int> dcmp_features = as<std::vector<int> >( dcmp_features_ ); bool scale = (bool) Rcpp::as<int>( scale_ ); double delta_global = as<double>( delta_ ); bool reEstimateDelta = Rcpp::as<int>( reEstimateDelta_ ); int nthreads = as<int>( nthreads_ ); bool quiet = Rcpp::as<int>( quiet_ ); std::vector<int> cincl = as<std::vector<int> >( cincl_ ); // Make sure all eigen values are non-negative for(unsigned int i=0; i<eigenValues->size; i++){ if( gsl_vector_get( eigenValues, i) < 0 ) gsl_vector_set( eigenValues, i, 0); } featureBatcher fbatch( data_, pBigMat_, 10000, cincl); // Get original number of threads int n_threads_original = omp_get_max_threads(); // Set threads to 1 omp_set_num_threads( nthreads ); // Intel paralellism #ifdef INTEL mkl_set_num_threads( 1 ); #endif // disable nested OpenMP parallelism omp_set_nested(0); // Process exression, X_loop and env Rexpress expr( as<string>( expression ), fbatch.get_N_indivs(), env ); RcppGSL::vector<double> y = expr.get_response(); int n_indivs = y->size; // Check that sizes of and X_loop match if( n_indivs != fbatch.get_N_indivs() ){ y.free(); throw invalid_argument("Dimensions of response and design matrix do not match\n"); } // If # of samples in W and y is the same bool useProxCon = ( Wtilde->size1 == y->size ); if( useProxCon && scale) gsl_matrix_center_scale(Wtilde); // X_clean = model.matrix.default( y ~ sex:One + age ) // Replace marker with 1's so that design matrix for marker j can be created by multiplcation RcppGSL::matrix<double> X_clean = expr.get_model_matrix_clean(); // If trying to access index beyond range // Exit gracefully if( terms[which_max(terms)] >= X_clean->size2 ){ y.free(); X_clean.free(); throw range_error("Element in \"terms\" is out of range"); } gsl_matrix *Xu_clean = gsl_matrix_alloc( eigenVectors->size2, X_clean->size2 ); // Xu_clean = crossprod( decomp$vectors, X_clean) gsl_blas_dgemm( CblasTrans, CblasNoTrans, 1.0, eigenVectors, X_clean, 0.0, Xu_clean ); // get indeces of columns that depend on X[,j] vector<int> loopIndex = expr.get_loop_terms(); long batch_size = MAX( 1, fbatch.getBatchSize()/100.0 ); // Fit null model if necessary if( R_IsNA( delta_global ) && ! reEstimateDelta ){ double log_L, sig_g, sig_e; RcppGSL::matrix<double> X_null = expr.get_model_matrix_null(); LRGPR *lrgpr = new LRGPR( y, eigenVectors, eigenValues, X_null->size2, useProxCon ? Wtilde->size2 : 0); // Must update W before X if( useProxCon ) lrgpr->update_Wtilde( Wtilde ); lrgpr->update_X( X_null ); // Estimate delta lrgpr->fit_mle( &log_L, &sig_g, &sig_e); delta_global = sig_e / sig_g; X_null.free(); delete lrgpr; } long tests_completed = 0; Progress p(0, false); std::vector<double> pValues( fbatch.get_N_features() ); std::fill(pValues.begin(), pValues.end(), NAN); // Get map for marker in dcmp_features // map_local = map[dcmp_features,] vector<string> chromosome_local = get_values( chromosome, dcmp_features); vector<double> location_local = get_values( location, dcmp_features); time_t start_time; time(&start_time); gsl_matrix *X_set = NULL; bool useThisChunk; for(int i_set=0; i_set<fbatch.get_N_features(); i_set+=fbatch.getBatchSize()){ // Load data from binary matrix (or do noting if NumericMatrix is used) useThisChunk = fbatch.loadNextChunk(); if( ! useThisChunk ) continue; #pragma omp parallel { // Variables local to each thread LRGPR *lrgpr = new LRGPR( y, eigenVectors, eigenValues, X_clean->size2, useProxCon ? Wtilde->size2 : 0); gsl_matrix *X = gsl_matrix_alloc( X_clean->size1, X_clean->size2 ); gsl_matrix *Xu = gsl_matrix_alloc( eigenVectors->size2, X_clean->size2 ); gsl_vector_view col_view, col_view2; double log_L, sig_g, sig_e; double delta; gsl_vector *marker_j = gsl_vector_alloc( y->size ); gsl_matrix *Wtilde_local = NULL; vector<int> exclude_prev; // Initialize with meaningless data exclude_prev.push_back(-10); #pragma omp for //schedule(static, batch_size) for(int j=0; j<fbatch.getBatchSize(); j++){ // If element j+i_set is not include include list, than skip if( ! fbatch.contains_element(j+i_set) ){ continue; } #pragma omp critical tests_completed++; fbatch.getFeautureInChunk( j, marker_j ); // replace missings values with the mean gsl_vector_set_missing_mean( marker_j ); // Copy clean version of matrices to active variables gsl_matrix_memcpy( X, X_clean ); gsl_matrix_memcpy( Xu, Xu_clean ); for(int k=0; k<loopIndex.size(); k++){ // X_design[,loopIndex[k]] = X_design[,loopIndex[k]] * marker_j col_view = gsl_matrix_column( (gsl_matrix*)(X), loopIndex[k] ); gsl_vector_mul( &col_view.vector, marker_j ); // Xu[,loopIndex[k]] = crossprod( U, X_design[,loopIndex[k]] * marker_j) col_view2 = gsl_matrix_column( (gsl_matrix*)(Xu), loopIndex[k] ); gsl_blas_dgemv( CblasTrans, 1.0, eigenVectors, &col_view.vector, 0.0, &col_view2.vector ); } // Proximal contamination ////////////////////////// // Must update W before X // If a MAP was specified if( chromosome.size() > 1 ){ vector<int> idx = get_markers_in_window( (string) chromosome[j+i_set], location[j+i_set], chromosome_local, location_local, distance); vector<int> exclude = get_values( dcmp_features, idx); /*cout << j+i_set <<": " << endl; cout << "idx: " << endl; print_vector(idx); cout << "exclude: " << endl; print_vector(exclude);*/ // If exclude != exclude_prev, Wtilde_local should be updated if( ! equal(exclude.begin(), exclude.end(), exclude_prev.begin()) ){ if( exclude.size() > 0 ){ // Wtilde_local = X[,exclude] /////////////////////////////// if( Wtilde_local != NULL ) gsl_matrix_free( Wtilde_local ); Wtilde_local = fbatch.getFeatures( exclude ); gsl_matrix_set_missing_mean_col( Wtilde_local ); if( scale ){ gsl_matrix_center_scale(Wtilde_local); } }else{ if( Wtilde_local != NULL ) gsl_matrix_free( Wtilde_local ); Wtilde_local = NULL; } // Update W_tilde based on marker window // if argument is NULL, proximal contamination is not used /////////////////////////////////// // If a global Wtilde is given if( useProxCon ){ if( Wtilde_local == NULL ){ lrgpr->update_Wtilde( Wtilde ); } if( Wtilde_local != NULL ){ gsl_matrix *Wcbind = gsl_matrix_alloc( Wtilde->size1, Wtilde_local->size2 + Wtilde->size2); gsl_matrix_cbind( Wtilde, Wtilde_local, Wcbind); lrgpr->update_Wtilde( Wcbind ); gsl_matrix_free(Wcbind); } }else{ // this doesn't have to be dont every time lrgpr->update_Wtilde( Wtilde_local ); } } // save exclude for next marker exclude_prev = exclude; }else{ if( useProxCon ) lrgpr->update_Wtilde( Wtilde ); } lrgpr->update_Xu( X, Xu ); if( reEstimateDelta ){ // Estimate delta lrgpr->fit_mle( &log_L, &sig_g, &sig_e); delta = sig_e / sig_g; }else{ // Use pre-specified delta value lrgpr->fit_fixed_delta( delta_global, &log_L, &sig_g, &sig_e ); delta = delta_global; } #pragma omp critical pValues[j+i_set] = lrgpr->wald_test( terms ); } // END for delete lrgpr; gsl_matrix_free( X ); gsl_matrix_free( Xu ); gsl_vector_free( marker_j ); if( Wtilde_local != NULL ) gsl_matrix_free( Wtilde_local ); } // End parallel if( ! quiet ) Rcpp::Rcout << print_progress( tests_completed, fbatch.get_N_features_included(), 25, start_time); if( Progress::check_abort() ){ y.free(); X_clean.free(); gsl_matrix_free(Xu_clean); eigenVectors.free(); eigenValues.free(); Wtilde.free(); return wrap(0); } } // End set loop // Restore original number of threads omp_set_num_threads( n_threads_original ); if( ! quiet ) Rcpp::Rcout << endl; y.free(); X_clean.free(); gsl_matrix_free(Xu_clean); eigenVectors.free(); eigenValues.free(); Wtilde.free(); return( wrap(pValues) ); //END_RCPP }
RcppExport SEXP R_lrgpr( SEXP Y_, SEXP X_, SEXP eigenVectors_, SEXP eigenValues_, SEXP delta_, SEXP nthreads_, SEXP Wtilde_, SEXP diagnostic_){ //BEGIN_RCPP RcppGSL::matrix<double> X = X_; RcppGSL::vector<double> y = Y_; RcppGSL::matrix<double> eigenVectors = eigenVectors_; RcppGSL::vector<double> eigenValues = eigenValues_; RcppGSL::matrix<double> Wtilde = Wtilde_; bool diagnostic = (bool) Rcpp::as<int>(diagnostic_); // If # of samples in W and y is the same bool useProxCon = ( Wtilde->size1 == y->size ); double delta = Rcpp::as<double>( delta_ ); double nthreads = Rcpp::as<double>( nthreads_ ); // Get original number of threads int n_threads_original = omp_get_max_threads(); if( ! R_IsNA( nthreads ) ){ omp_set_num_threads( nthreads ); // Intel paralellism #ifdef INTEL mkl_set_num_threads( nthreads ); #endif // disable nested OpenMP parallelism omp_set_nested(0); } // Make sure all eigen values are non-negative for(unsigned int i=0; i<eigenValues->size; i++){ if( gsl_vector_get( eigenValues, i) < 0 ) gsl_vector_set( eigenValues, i, 0); } //cout << "Create lrgpr" << endl; LRGPR *lrgpr = new LRGPR( y, eigenVectors, eigenValues, X->size2, useProxCon ? Wtilde->size2 : 0); // Must update W before X if( useProxCon ) lrgpr->update_Wtilde( Wtilde ); //cout << "Scale" << endl; gsl_matrix_set_missing_mean_col( X ); //cout << "Update X" << endl; lrgpr->update_X( X ); double log_L, sig_g, sig_e; if( isnan( delta ) ){ // Estimate delta lrgpr->fit_mle( &log_L, &sig_g, &sig_e); delta = sig_e / sig_g; }else{ // Use pre-specified delta value lrgpr->fit_fixed_delta( delta, &log_L, &sig_g, &sig_e ); } double df; RcppGSL::vector<double> Hii(y->size); if( diagnostic && ! useProxCon){ gsl_vector_free(Hii); Hii = lrgpr->get_hat_matrix_diag(); df = gsl_vector_sum_elements( Hii ); }else{ gsl_vector_set_all(Hii, NAN); if( ! useProxCon ) df = lrgpr->get_effective_df(); else df = NAN; } // Restore original number of threads omp_set_num_threads( n_threads_original ); RcppGSL::vector<double> alpha = gsl_vector_alloc( y->size ); RcppGSL::vector<double> Y_hat = lrgpr->get_fitted_response( alpha ); RcppGSL::matrix<double> Sigma = lrgpr->coeff_covariance(); RcppGSL::vector<double> pValues = lrgpr->wald_test_all(); RcppGSL::vector<double> beta = lrgpr->get_beta(); Rcpp::List res = Rcpp::List::create(Rcpp::Named("coefficients") = beta, Rcpp::Named("p.values") = pValues, Rcpp::Named("sigSq_e") = sig_e, Rcpp::Named("sigSq_a") = sig_g, Rcpp::Named("delta") = delta, Rcpp::Named("df") = df, Rcpp::Named("rank") = eigenValues->size, Rcpp::Named("logLik") = log_L, Rcpp::Named("fitted.values")= Y_hat, Rcpp::Named("alpha") = alpha, Rcpp::Named("Sigma") = Sigma, Rcpp::Named("hii") = Hii ); X.free(); y.free(); eigenVectors.free(); eigenValues.free(); beta.free(); Wtilde.free(); delete lrgpr; Hii.free(); alpha.free(); Y_hat.free(); Sigma.free(); pValues.free(); return res; // return the result list to R //END_RCPP }