~Flim() { lambda_.free(); ones_.free(); kappa_.free(); estimates_.free(); q_lambda_.free(); singleton_expectation_.free(); empirical_pair_.free(); empirical_singleton_.free(); }
void initializeKappa(int num_documents) { gsl_vector_float_memcpy(kappa_, empirical_singleton_); gsl_vector_float_scale(kappa_, num_documents); gsl_vector_float_add_constant(kappa_, 1.0); gsl_vector_float_scale(kappa_, 1.0 / (2.0 + num_documents)); for (int ii = 0; ii < kappa_.size(); ++ii) { singleton_expectation_[ii] = kappa_[ii]; kappa_[ii] = logit(kappa_[ii]); } }
// [[Rcpp::export]] Rcpp::List fitData(Rcpp::DataFrame ds) { const size_t ncoeffs = NCOEFFS; const size_t nbreak = NBREAK; const size_t n = N; size_t i, j; Rcpp::DataFrame D(ds); // construct the data.frame object RcppGSL::vector<double> y = D["y"]; // access columns by name, RcppGSL::vector<double> x = D["x"]; // assigning to GSL vectors RcppGSL::vector<double> w = D["w"]; gsl_bspline_workspace *bw; gsl_vector *B; gsl_vector *c; gsl_matrix *X, *cov; gsl_multifit_linear_workspace *mw; double chisq, Rsq, dof, tss; bw = gsl_bspline_alloc(4, nbreak); // allocate a cubic bspline workspace (k = 4) B = gsl_vector_alloc(ncoeffs); X = gsl_matrix_alloc(n, ncoeffs); c = gsl_vector_alloc(ncoeffs); cov = gsl_matrix_alloc(ncoeffs, ncoeffs); mw = gsl_multifit_linear_alloc(n, ncoeffs); gsl_bspline_knots_uniform(0.0, 15.0, bw); // use uniform breakpoints on [0, 15] for (i = 0; i < n; ++i) { // construct the fit matrix X double xi = gsl_vector_get(x, i); gsl_bspline_eval(xi, B, bw); // compute B_j(xi) for all j for (j = 0; j < ncoeffs; ++j) { // fill in row i of X double Bj = gsl_vector_get(B, j); gsl_matrix_set(X, i, j, Bj); } } gsl_multifit_wlinear(X, w, y, c, cov, &chisq, mw); // do the fit dof = n - ncoeffs; tss = gsl_stats_wtss(w->data, 1, y->data, 1, y->size); Rsq = 1.0 - chisq / tss; Rcpp::NumericVector FX(151), FY(151); // output the smoothed curve double xi, yi, yerr; for (xi = 0.0, i=0; xi < 15.0; xi += 0.1, i++) { gsl_bspline_eval(xi, B, bw); gsl_multifit_linear_est(B, c, cov, &yi, &yerr); FX[i] = xi; FY[i] = yi; } Rcpp::List res = Rcpp::List::create(Rcpp::Named("X")=FX, Rcpp::Named("Y")=FY, Rcpp::Named("chisqdof")=Rcpp::wrap(chisq/dof), Rcpp::Named("rsq")=Rcpp::wrap(Rsq)); gsl_bspline_free(bw); gsl_vector_free(B); gsl_matrix_free(X); gsl_vector_free(c); gsl_matrix_free(cov); gsl_multifit_linear_free(mw); y.free(); x.free(); w.free(); return(res); }
RcppExport SEXP R_lrgprApply( SEXP expression_, SEXP data_, SEXP pBigMat_, SEXP env_, SEXP terms_, SEXP EigenVectors_, SEXP EigenValues_, SEXP Wtilde_, SEXP rank_, SEXP chromosome_, SEXP location_, SEXP distance_, SEXP dcmp_features_, SEXP scale_, SEXP delta_, SEXP reEstimateDelta_, SEXP nthreads_, SEXP quiet_, SEXP cincl_){ //BEGIN_RCPP CharacterVector expression( expression_ ); Environment env( env_ ); std::vector<int> terms = as<std::vector<int> >( terms_ ); RcppGSL::matrix<double> eigenVectors = EigenVectors_; RcppGSL::vector<double> eigenValues = EigenValues_; RcppGSL::matrix<double> Wtilde = Wtilde_; int rank = as<int>( rank_ ) ; std::vector<string> chromosome = as<std::vector<string> >(chromosome_); std::vector<double> location = as<std::vector<double> >(location_); double distance = as<double>( distance_ ); std::vector<int> dcmp_features = as<std::vector<int> >( dcmp_features_ ); bool scale = (bool) Rcpp::as<int>( scale_ ); double delta_global = as<double>( delta_ ); bool reEstimateDelta = Rcpp::as<int>( reEstimateDelta_ ); int nthreads = as<int>( nthreads_ ); bool quiet = Rcpp::as<int>( quiet_ ); std::vector<int> cincl = as<std::vector<int> >( cincl_ ); // Make sure all eigen values are non-negative for(unsigned int i=0; i<eigenValues->size; i++){ if( gsl_vector_get( eigenValues, i) < 0 ) gsl_vector_set( eigenValues, i, 0); } featureBatcher fbatch( data_, pBigMat_, 10000, cincl); // Get original number of threads int n_threads_original = omp_get_max_threads(); // Set threads to 1 omp_set_num_threads( nthreads ); // Intel paralellism #ifdef INTEL mkl_set_num_threads( 1 ); #endif // disable nested OpenMP parallelism omp_set_nested(0); // Process exression, X_loop and env Rexpress expr( as<string>( expression ), fbatch.get_N_indivs(), env ); RcppGSL::vector<double> y = expr.get_response(); int n_indivs = y->size; // Check that sizes of and X_loop match if( n_indivs != fbatch.get_N_indivs() ){ y.free(); throw invalid_argument("Dimensions of response and design matrix do not match\n"); } // If # of samples in W and y is the same bool useProxCon = ( Wtilde->size1 == y->size ); if( useProxCon && scale) gsl_matrix_center_scale(Wtilde); // X_clean = model.matrix.default( y ~ sex:One + age ) // Replace marker with 1's so that design matrix for marker j can be created by multiplcation RcppGSL::matrix<double> X_clean = expr.get_model_matrix_clean(); // If trying to access index beyond range // Exit gracefully if( terms[which_max(terms)] >= X_clean->size2 ){ y.free(); X_clean.free(); throw range_error("Element in \"terms\" is out of range"); } gsl_matrix *Xu_clean = gsl_matrix_alloc( eigenVectors->size2, X_clean->size2 ); // Xu_clean = crossprod( decomp$vectors, X_clean) gsl_blas_dgemm( CblasTrans, CblasNoTrans, 1.0, eigenVectors, X_clean, 0.0, Xu_clean ); // get indeces of columns that depend on X[,j] vector<int> loopIndex = expr.get_loop_terms(); long batch_size = MAX( 1, fbatch.getBatchSize()/100.0 ); // Fit null model if necessary if( R_IsNA( delta_global ) && ! reEstimateDelta ){ double log_L, sig_g, sig_e; RcppGSL::matrix<double> X_null = expr.get_model_matrix_null(); LRGPR *lrgpr = new LRGPR( y, eigenVectors, eigenValues, X_null->size2, useProxCon ? Wtilde->size2 : 0); // Must update W before X if( useProxCon ) lrgpr->update_Wtilde( Wtilde ); lrgpr->update_X( X_null ); // Estimate delta lrgpr->fit_mle( &log_L, &sig_g, &sig_e); delta_global = sig_e / sig_g; X_null.free(); delete lrgpr; } long tests_completed = 0; Progress p(0, false); std::vector<double> pValues( fbatch.get_N_features() ); std::fill(pValues.begin(), pValues.end(), NAN); // Get map for marker in dcmp_features // map_local = map[dcmp_features,] vector<string> chromosome_local = get_values( chromosome, dcmp_features); vector<double> location_local = get_values( location, dcmp_features); time_t start_time; time(&start_time); gsl_matrix *X_set = NULL; bool useThisChunk; for(int i_set=0; i_set<fbatch.get_N_features(); i_set+=fbatch.getBatchSize()){ // Load data from binary matrix (or do noting if NumericMatrix is used) useThisChunk = fbatch.loadNextChunk(); if( ! useThisChunk ) continue; #pragma omp parallel { // Variables local to each thread LRGPR *lrgpr = new LRGPR( y, eigenVectors, eigenValues, X_clean->size2, useProxCon ? Wtilde->size2 : 0); gsl_matrix *X = gsl_matrix_alloc( X_clean->size1, X_clean->size2 ); gsl_matrix *Xu = gsl_matrix_alloc( eigenVectors->size2, X_clean->size2 ); gsl_vector_view col_view, col_view2; double log_L, sig_g, sig_e; double delta; gsl_vector *marker_j = gsl_vector_alloc( y->size ); gsl_matrix *Wtilde_local = NULL; vector<int> exclude_prev; // Initialize with meaningless data exclude_prev.push_back(-10); #pragma omp for //schedule(static, batch_size) for(int j=0; j<fbatch.getBatchSize(); j++){ // If element j+i_set is not include include list, than skip if( ! fbatch.contains_element(j+i_set) ){ continue; } #pragma omp critical tests_completed++; fbatch.getFeautureInChunk( j, marker_j ); // replace missings values with the mean gsl_vector_set_missing_mean( marker_j ); // Copy clean version of matrices to active variables gsl_matrix_memcpy( X, X_clean ); gsl_matrix_memcpy( Xu, Xu_clean ); for(int k=0; k<loopIndex.size(); k++){ // X_design[,loopIndex[k]] = X_design[,loopIndex[k]] * marker_j col_view = gsl_matrix_column( (gsl_matrix*)(X), loopIndex[k] ); gsl_vector_mul( &col_view.vector, marker_j ); // Xu[,loopIndex[k]] = crossprod( U, X_design[,loopIndex[k]] * marker_j) col_view2 = gsl_matrix_column( (gsl_matrix*)(Xu), loopIndex[k] ); gsl_blas_dgemv( CblasTrans, 1.0, eigenVectors, &col_view.vector, 0.0, &col_view2.vector ); } // Proximal contamination ////////////////////////// // Must update W before X // If a MAP was specified if( chromosome.size() > 1 ){ vector<int> idx = get_markers_in_window( (string) chromosome[j+i_set], location[j+i_set], chromosome_local, location_local, distance); vector<int> exclude = get_values( dcmp_features, idx); /*cout << j+i_set <<": " << endl; cout << "idx: " << endl; print_vector(idx); cout << "exclude: " << endl; print_vector(exclude);*/ // If exclude != exclude_prev, Wtilde_local should be updated if( ! equal(exclude.begin(), exclude.end(), exclude_prev.begin()) ){ if( exclude.size() > 0 ){ // Wtilde_local = X[,exclude] /////////////////////////////// if( Wtilde_local != NULL ) gsl_matrix_free( Wtilde_local ); Wtilde_local = fbatch.getFeatures( exclude ); gsl_matrix_set_missing_mean_col( Wtilde_local ); if( scale ){ gsl_matrix_center_scale(Wtilde_local); } }else{ if( Wtilde_local != NULL ) gsl_matrix_free( Wtilde_local ); Wtilde_local = NULL; } // Update W_tilde based on marker window // if argument is NULL, proximal contamination is not used /////////////////////////////////// // If a global Wtilde is given if( useProxCon ){ if( Wtilde_local == NULL ){ lrgpr->update_Wtilde( Wtilde ); } if( Wtilde_local != NULL ){ gsl_matrix *Wcbind = gsl_matrix_alloc( Wtilde->size1, Wtilde_local->size2 + Wtilde->size2); gsl_matrix_cbind( Wtilde, Wtilde_local, Wcbind); lrgpr->update_Wtilde( Wcbind ); gsl_matrix_free(Wcbind); } }else{ // this doesn't have to be dont every time lrgpr->update_Wtilde( Wtilde_local ); } } // save exclude for next marker exclude_prev = exclude; }else{ if( useProxCon ) lrgpr->update_Wtilde( Wtilde ); } lrgpr->update_Xu( X, Xu ); if( reEstimateDelta ){ // Estimate delta lrgpr->fit_mle( &log_L, &sig_g, &sig_e); delta = sig_e / sig_g; }else{ // Use pre-specified delta value lrgpr->fit_fixed_delta( delta_global, &log_L, &sig_g, &sig_e ); delta = delta_global; } #pragma omp critical pValues[j+i_set] = lrgpr->wald_test( terms ); } // END for delete lrgpr; gsl_matrix_free( X ); gsl_matrix_free( Xu ); gsl_vector_free( marker_j ); if( Wtilde_local != NULL ) gsl_matrix_free( Wtilde_local ); } // End parallel if( ! quiet ) Rcpp::Rcout << print_progress( tests_completed, fbatch.get_N_features_included(), 25, start_time); if( Progress::check_abort() ){ y.free(); X_clean.free(); gsl_matrix_free(Xu_clean); eigenVectors.free(); eigenValues.free(); Wtilde.free(); return wrap(0); } } // End set loop // Restore original number of threads omp_set_num_threads( n_threads_original ); if( ! quiet ) Rcpp::Rcout << endl; y.free(); X_clean.free(); gsl_matrix_free(Xu_clean); eigenVectors.free(); eigenValues.free(); Wtilde.free(); return( wrap(pValues) ); //END_RCPP }
RcppExport SEXP R_lrgpr( SEXP Y_, SEXP X_, SEXP eigenVectors_, SEXP eigenValues_, SEXP delta_, SEXP nthreads_, SEXP Wtilde_, SEXP diagnostic_){ //BEGIN_RCPP RcppGSL::matrix<double> X = X_; RcppGSL::vector<double> y = Y_; RcppGSL::matrix<double> eigenVectors = eigenVectors_; RcppGSL::vector<double> eigenValues = eigenValues_; RcppGSL::matrix<double> Wtilde = Wtilde_; bool diagnostic = (bool) Rcpp::as<int>(diagnostic_); // If # of samples in W and y is the same bool useProxCon = ( Wtilde->size1 == y->size ); double delta = Rcpp::as<double>( delta_ ); double nthreads = Rcpp::as<double>( nthreads_ ); // Get original number of threads int n_threads_original = omp_get_max_threads(); if( ! R_IsNA( nthreads ) ){ omp_set_num_threads( nthreads ); // Intel paralellism #ifdef INTEL mkl_set_num_threads( nthreads ); #endif // disable nested OpenMP parallelism omp_set_nested(0); } // Make sure all eigen values are non-negative for(unsigned int i=0; i<eigenValues->size; i++){ if( gsl_vector_get( eigenValues, i) < 0 ) gsl_vector_set( eigenValues, i, 0); } //cout << "Create lrgpr" << endl; LRGPR *lrgpr = new LRGPR( y, eigenVectors, eigenValues, X->size2, useProxCon ? Wtilde->size2 : 0); // Must update W before X if( useProxCon ) lrgpr->update_Wtilde( Wtilde ); //cout << "Scale" << endl; gsl_matrix_set_missing_mean_col( X ); //cout << "Update X" << endl; lrgpr->update_X( X ); double log_L, sig_g, sig_e; if( isnan( delta ) ){ // Estimate delta lrgpr->fit_mle( &log_L, &sig_g, &sig_e); delta = sig_e / sig_g; }else{ // Use pre-specified delta value lrgpr->fit_fixed_delta( delta, &log_L, &sig_g, &sig_e ); } double df; RcppGSL::vector<double> Hii(y->size); if( diagnostic && ! useProxCon){ gsl_vector_free(Hii); Hii = lrgpr->get_hat_matrix_diag(); df = gsl_vector_sum_elements( Hii ); }else{ gsl_vector_set_all(Hii, NAN); if( ! useProxCon ) df = lrgpr->get_effective_df(); else df = NAN; } // Restore original number of threads omp_set_num_threads( n_threads_original ); RcppGSL::vector<double> alpha = gsl_vector_alloc( y->size ); RcppGSL::vector<double> Y_hat = lrgpr->get_fitted_response( alpha ); RcppGSL::matrix<double> Sigma = lrgpr->coeff_covariance(); RcppGSL::vector<double> pValues = lrgpr->wald_test_all(); RcppGSL::vector<double> beta = lrgpr->get_beta(); Rcpp::List res = Rcpp::List::create(Rcpp::Named("coefficients") = beta, Rcpp::Named("p.values") = pValues, Rcpp::Named("sigSq_e") = sig_e, Rcpp::Named("sigSq_a") = sig_g, Rcpp::Named("delta") = delta, Rcpp::Named("df") = df, Rcpp::Named("rank") = eigenValues->size, Rcpp::Named("logLik") = log_L, Rcpp::Named("fitted.values")= Y_hat, Rcpp::Named("alpha") = alpha, Rcpp::Named("Sigma") = Sigma, Rcpp::Named("hii") = Hii ); X.free(); y.free(); eigenVectors.free(); eigenValues.free(); beta.free(); Wtilde.free(); delete lrgpr; Hii.free(); alpha.free(); Y_hat.free(); Sigma.free(); pValues.free(); return res; // return the result list to R //END_RCPP }