Beispiel #1
0
 ~Flim() {
   lambda_.free();
   ones_.free();
   kappa_.free();
   estimates_.free();
   q_lambda_.free();
   singleton_expectation_.free();
   empirical_pair_.free();
   empirical_singleton_.free();
 }
Beispiel #2
0
 void initializeKappa(int num_documents) {
   gsl_vector_float_memcpy(kappa_, empirical_singleton_);
   gsl_vector_float_scale(kappa_, num_documents);
   gsl_vector_float_add_constant(kappa_, 1.0);
   gsl_vector_float_scale(kappa_, 1.0 / (2.0 + num_documents));
   for (int ii = 0; ii < kappa_.size(); ++ii) {
     singleton_expectation_[ii] = kappa_[ii];
     kappa_[ii] = logit(kappa_[ii]);
   }
 }
Beispiel #3
0
// [[Rcpp::export]]
Rcpp::List fitData(Rcpp::DataFrame ds) {

    const size_t ncoeffs = NCOEFFS;
    const size_t nbreak = NBREAK;

    const size_t n = N;
    size_t i, j;

    Rcpp::DataFrame D(ds);    		// construct the data.frame object
    RcppGSL::vector<double> y = D["y"];	// access columns by name, 
    RcppGSL::vector<double> x = D["x"];	// assigning to GSL vectors
    RcppGSL::vector<double> w = D["w"];

    gsl_bspline_workspace *bw;
    gsl_vector *B;
    gsl_vector *c; 
    gsl_matrix *X, *cov;
    gsl_multifit_linear_workspace *mw;
    double chisq, Rsq, dof, tss;

    bw = gsl_bspline_alloc(4, nbreak);	    // allocate a cubic bspline workspace (k = 4)
    B = gsl_vector_alloc(ncoeffs);

    X = gsl_matrix_alloc(n, ncoeffs);
    c = gsl_vector_alloc(ncoeffs);
    cov = gsl_matrix_alloc(ncoeffs, ncoeffs);
    mw = gsl_multifit_linear_alloc(n, ncoeffs);

    gsl_bspline_knots_uniform(0.0, 15.0, bw);	// use uniform breakpoints on [0, 15] 

    for (i = 0; i < n; ++i) {			// construct the fit matrix X 
        double xi = gsl_vector_get(x, i);

        gsl_bspline_eval(xi, B, bw);		// compute B_j(xi) for all j 

        for (j = 0; j < ncoeffs; ++j) {		// fill in row i of X 
            double Bj = gsl_vector_get(B, j);
            gsl_matrix_set(X, i, j, Bj);
        }
    }

    gsl_multifit_wlinear(X, w, y, c, cov, &chisq, mw);  // do the fit 
    
    dof = n - ncoeffs;
    tss = gsl_stats_wtss(w->data, 1, y->data, 1, y->size);
    Rsq = 1.0 - chisq / tss;
    
    Rcpp::NumericVector FX(151), FY(151);	// output the smoothed curve 
    double xi, yi, yerr;
    for (xi = 0.0, i=0; xi < 15.0; xi += 0.1, i++) {
        gsl_bspline_eval(xi, B, bw);
        gsl_multifit_linear_est(B, c, cov, &yi, &yerr);
        FX[i] = xi;
        FY[i] = yi;
    }

    Rcpp::List res =
      Rcpp::List::create(Rcpp::Named("X")=FX,
                         Rcpp::Named("Y")=FY,
			 Rcpp::Named("chisqdof")=Rcpp::wrap(chisq/dof),
			 Rcpp::Named("rsq")=Rcpp::wrap(Rsq));

    gsl_bspline_free(bw);
    gsl_vector_free(B);
    gsl_matrix_free(X);
    gsl_vector_free(c);
    gsl_matrix_free(cov);
    gsl_multifit_linear_free(mw);
    
    y.free();
    x.free();
    w.free();

    return(res);   
}
Beispiel #4
0
RcppExport SEXP R_lrgprApply( SEXP expression_, SEXP data_, SEXP pBigMat_, SEXP env_, SEXP terms_, SEXP EigenVectors_, SEXP EigenValues_, SEXP Wtilde_, SEXP rank_,  SEXP chromosome_, SEXP location_, SEXP distance_, SEXP dcmp_features_, SEXP scale_, SEXP delta_, SEXP reEstimateDelta_, SEXP nthreads_, SEXP quiet_, SEXP cincl_){

//BEGIN_RCPP
		
	CharacterVector expression( expression_ );
	Environment env( env_ );
	std::vector<int> terms = as<std::vector<int> >( terms_ ); 
	RcppGSL::matrix<double> eigenVectors = EigenVectors_;
	RcppGSL::vector<double> eigenValues = EigenValues_; 
	RcppGSL::matrix<double> Wtilde = Wtilde_; 
	int rank = as<int>( rank_ )
;	std::vector<string> chromosome = as<std::vector<string> >(chromosome_);
	std::vector<double> location = as<std::vector<double> >(location_);
	double distance = as<double>( distance_ );
	std::vector<int> dcmp_features = as<std::vector<int> >( dcmp_features_ );
	bool scale = (bool) Rcpp::as<int>( scale_ );
	double delta_global = as<double>( delta_ );
	bool reEstimateDelta = Rcpp::as<int>( reEstimateDelta_ );
	int nthreads = as<int>( nthreads_ );
	bool quiet = Rcpp::as<int>( quiet_ );
	std::vector<int> cincl = as<std::vector<int> >( cincl_ );


	// Make sure all eigen values are non-negative
	for(unsigned int i=0; i<eigenValues->size; i++){
		if( gsl_vector_get( eigenValues, i) < 0 )  gsl_vector_set( eigenValues, i, 0);
	}
	
	featureBatcher fbatch( data_, pBigMat_, 10000, cincl);

	// Get original number of threads
	int n_threads_original = omp_get_max_threads();

	// Set threads to 1
	omp_set_num_threads( nthreads );
	// Intel paralellism
	#ifdef INTEL
	mkl_set_num_threads( 1 );
	#endif
	// disable nested OpenMP parallelism
	omp_set_nested(0);

	// Process exression, X_loop and env
	Rexpress expr( as<string>( expression ), fbatch.get_N_indivs(), env );			

	RcppGSL::vector<double> y = expr.get_response(); 

	int n_indivs = y->size;

	// Check that sizes of and X_loop match
	if( n_indivs != fbatch.get_N_indivs() ){
		y.free();
	
		throw invalid_argument("Dimensions of response and design matrix do not match\n");
	}

	// If # of samples in W and y is the same
	bool useProxCon = ( Wtilde->size1 == y->size );

	if( useProxCon && scale) gsl_matrix_center_scale(Wtilde);
							
	// X_clean = model.matrix.default( y ~ sex:One + age )
	// Replace marker with 1's so that design matrix for marker j can be created by multiplcation
	RcppGSL::matrix<double> X_clean = expr.get_model_matrix_clean(); 

	// If trying to access index beyond range
	// Exit gracefully
	if( terms[which_max(terms)] >= X_clean->size2 ){
		y.free();
		X_clean.free();
	
		throw range_error("Element in \"terms\" is out of range");
	}

	gsl_matrix *Xu_clean = gsl_matrix_alloc( eigenVectors->size2, X_clean->size2 );

	// Xu_clean = crossprod( decomp$vectors, X_clean)
	gsl_blas_dgemm( CblasTrans, CblasNoTrans, 1.0, eigenVectors, X_clean, 0.0, Xu_clean );

	// get indeces of columns that depend on X[,j]
	vector<int> loopIndex = expr.get_loop_terms();

	long batch_size = MAX( 1, fbatch.getBatchSize()/100.0 ); 

	// Fit null model if necessary
	if( R_IsNA( delta_global ) && ! reEstimateDelta ){
			
		double log_L, sig_g, sig_e;

		RcppGSL::matrix<double> X_null = expr.get_model_matrix_null(); 	

		LRGPR *lrgpr = new LRGPR( y, eigenVectors, eigenValues, X_null->size2, useProxCon ? Wtilde->size2 : 0);

		// Must update W before X
		if( useProxCon ) lrgpr->update_Wtilde( Wtilde );
		lrgpr->update_X( X_null );

		// Estimate delta
		lrgpr->fit_mle( &log_L, &sig_g, &sig_e);
		delta_global = sig_e / sig_g;

		X_null.free(); 
		delete lrgpr;
	}

	long tests_completed = 0;
	Progress p(0, false);

	std::vector<double> pValues( fbatch.get_N_features() );
	std::fill(pValues.begin(), pValues.end(), NAN);

	// Get map for marker in dcmp_features
	// map_local = map[dcmp_features,]
	vector<string> chromosome_local = get_values( chromosome, dcmp_features);
	vector<double> location_local = get_values( location, dcmp_features); 

	time_t start_time;
	time(&start_time);

	gsl_matrix *X_set = NULL;
	bool useThisChunk;

	for(int i_set=0; i_set<fbatch.get_N_features(); i_set+=fbatch.getBatchSize()){

		// Load data from binary matrix (or do noting if NumericMatrix is used)
		useThisChunk = fbatch.loadNextChunk();

		if( ! useThisChunk ) continue;

		#pragma omp parallel
		{
			// Variables local to each thread		
			LRGPR *lrgpr = new LRGPR( y, eigenVectors, eigenValues, X_clean->size2, useProxCon ? Wtilde->size2 : 0);

			gsl_matrix *X = gsl_matrix_alloc( X_clean->size1, X_clean->size2 );
			gsl_matrix *Xu = gsl_matrix_alloc( eigenVectors->size2, X_clean->size2 );

			gsl_vector_view col_view, col_view2;

			double log_L, sig_g, sig_e;
			double delta;

			gsl_vector *marker_j = gsl_vector_alloc( y->size ); 

			gsl_matrix *Wtilde_local = NULL;
			vector<int> exclude_prev; 

			// Initialize with meaningless data
			exclude_prev.push_back(-10);

			#pragma omp for //schedule(static, batch_size)		
			for(int j=0; j<fbatch.getBatchSize(); j++){		

				// If element j+i_set is not include include list, than skip
				if( ! fbatch.contains_element(j+i_set) ){
					continue;
				}

				#pragma omp critical
				tests_completed++;

				fbatch.getFeautureInChunk( j, marker_j );	

				// replace missings values with the mean
				gsl_vector_set_missing_mean( marker_j );					
				
				// Copy clean version of matrices to active variables
				gsl_matrix_memcpy( X, X_clean );
				gsl_matrix_memcpy( Xu, Xu_clean );	

				for(int k=0; k<loopIndex.size(); k++){			
					// X_design[,loopIndex[k]] = X_design[,loopIndex[k]] * marker_j
					col_view = gsl_matrix_column( (gsl_matrix*)(X), loopIndex[k] );
					gsl_vector_mul( &col_view.vector, marker_j );

					// Xu[,loopIndex[k]] = crossprod( U, X_design[,loopIndex[k]] * marker_j)
					col_view2 = gsl_matrix_column( (gsl_matrix*)(Xu), loopIndex[k] );
					gsl_blas_dgemv( CblasTrans, 1.0, eigenVectors, &col_view.vector, 0.0, &col_view2.vector );
				}

				// Proximal contamination
				//////////////////////////

				// Must update W before X

				// If a MAP was specified
				if( chromosome.size() > 1 ){

					vector<int> idx = get_markers_in_window( (string) chromosome[j+i_set], location[j+i_set], chromosome_local, location_local, distance);

					vector<int> exclude = get_values( dcmp_features, idx);

					/*cout << j+i_set <<": " << endl;
					
					cout << "idx: " << endl;
					print_vector(idx);

					cout << "exclude: " << endl;
					print_vector(exclude);*/

					// If exclude != exclude_prev, Wtilde_local should be updated
					if( ! equal(exclude.begin(), exclude.end(), exclude_prev.begin()) ){

						if( exclude.size() > 0 ){

							// Wtilde_local = X[,exclude]
							///////////////////////////////

							if( Wtilde_local != NULL ) gsl_matrix_free( Wtilde_local );
	
							Wtilde_local = fbatch.getFeatures( exclude );

							gsl_matrix_set_missing_mean_col( Wtilde_local );

							if( scale ){
								gsl_matrix_center_scale(Wtilde_local);
							}

						}else{
							if( Wtilde_local != NULL ) gsl_matrix_free( Wtilde_local );
							Wtilde_local = NULL;
						}

						// Update W_tilde based on marker window
						// if argument is NULL, proximal contamination is not used
						///////////////////////////////////

						// If a global Wtilde is given
						if( useProxCon ){
							if( Wtilde_local == NULL ){
								lrgpr->update_Wtilde( Wtilde );
							}
							if( Wtilde_local != NULL ){

								gsl_matrix *Wcbind = gsl_matrix_alloc( Wtilde->size1, Wtilde_local->size2 + Wtilde->size2);
								gsl_matrix_cbind( Wtilde, Wtilde_local, Wcbind);

								lrgpr->update_Wtilde( Wcbind );

								gsl_matrix_free(Wcbind);
							}
						}else{				
							// this doesn't have to be dont every time		
							lrgpr->update_Wtilde( Wtilde_local );
						}
					}

					// save exclude for next marker
					exclude_prev = exclude;
				}else{
					if( useProxCon ) lrgpr->update_Wtilde( Wtilde );
				}

				lrgpr->update_Xu( X, Xu );						

				if( reEstimateDelta ){
					// Estimate delta
					lrgpr->fit_mle( &log_L, &sig_g, &sig_e);
				
					delta = sig_e / sig_g;
				}else{
					// Use pre-specified delta value
					lrgpr->fit_fixed_delta( delta_global, &log_L, &sig_g, &sig_e );
					delta = delta_global;
				}

				#pragma omp critical
				pValues[j+i_set] = lrgpr->wald_test( terms );	

			} // END for
						
			delete lrgpr;			
			gsl_matrix_free( X );
			gsl_matrix_free( Xu );
			gsl_vector_free( marker_j );
			if( Wtilde_local != NULL ) gsl_matrix_free( Wtilde_local );

		} // End parallel

		if( ! quiet )  Rcpp::Rcout << print_progress( tests_completed, fbatch.get_N_features_included(), 25, start_time);

		if( Progress::check_abort() ){

			y.free();
			X_clean.free();
			gsl_matrix_free(Xu_clean);

			eigenVectors.free();
			eigenValues.free();
			Wtilde.free();
			return wrap(0);
		}

	} // End set loop

	// Restore original number of threads
	omp_set_num_threads( n_threads_original );

	if( ! quiet ) Rcpp::Rcout << endl;

	y.free();
	X_clean.free();
	gsl_matrix_free(Xu_clean);

	eigenVectors.free();
	eigenValues.free();
	Wtilde.free();

	return( wrap(pValues) );

//END_RCPP
}
Beispiel #5
0
RcppExport SEXP R_lrgpr( SEXP Y_, SEXP X_, SEXP eigenVectors_, SEXP eigenValues_, SEXP delta_, SEXP nthreads_, SEXP Wtilde_, SEXP diagnostic_){

//BEGIN_RCPP
	
	RcppGSL::matrix<double> X = X_; 
	RcppGSL::vector<double> y = Y_; 	
	RcppGSL::matrix<double> eigenVectors = eigenVectors_; 	
	RcppGSL::vector<double> eigenValues = eigenValues_; 
	RcppGSL::matrix<double> Wtilde = Wtilde_; 
	bool diagnostic = (bool) Rcpp::as<int>(diagnostic_);

	// If # of samples in W and y is the same
	bool useProxCon = ( Wtilde->size1 == y->size );
	
	double delta = Rcpp::as<double>( delta_ );
	double nthreads = Rcpp::as<double>( nthreads_ );

	// Get original number of threads
	int n_threads_original = omp_get_max_threads();

	if( ! R_IsNA( nthreads ) ){

		omp_set_num_threads( nthreads ); 
		// Intel paralellism
		#ifdef INTEL
		mkl_set_num_threads( nthreads );	
		#endif
		// disable nested OpenMP parallelism
		omp_set_nested(0);
	}
	
	// Make sure all eigen values are non-negative
	for(unsigned int i=0; i<eigenValues->size; i++){
		if( gsl_vector_get( eigenValues, i) < 0 )  gsl_vector_set( eigenValues, i, 0);
	}

	//cout << "Create lrgpr" << endl;
	LRGPR *lrgpr = new LRGPR( y, eigenVectors, eigenValues, X->size2, useProxCon ? Wtilde->size2 : 0);

	// Must update W before X
	if( useProxCon ) lrgpr->update_Wtilde( Wtilde );

	//cout << "Scale" << endl;
	gsl_matrix_set_missing_mean_col( X );

	//cout << "Update X" << endl;	
	lrgpr->update_X( X );

	double log_L, sig_g, sig_e;

	if( isnan( delta ) ){
		// Estimate delta
		lrgpr->fit_mle( &log_L, &sig_g, &sig_e);

		delta = sig_e / sig_g;
	}else{
		// Use pre-specified delta value
		lrgpr->fit_fixed_delta( delta, &log_L, &sig_g, &sig_e );
	}

	double df;
	RcppGSL::vector<double> Hii(y->size);

	if( diagnostic && ! useProxCon){
		gsl_vector_free(Hii);
		Hii = lrgpr->get_hat_matrix_diag();	
		df = gsl_vector_sum_elements( Hii );

	}else{
		gsl_vector_set_all(Hii, NAN);
		
		if( ! useProxCon )	df = lrgpr->get_effective_df();
		else df = NAN;
	}

	// Restore original number of threads
	omp_set_num_threads( n_threads_original );

	RcppGSL::vector<double> alpha = gsl_vector_alloc( y->size );
	RcppGSL::vector<double> Y_hat = lrgpr->get_fitted_response( alpha );
	RcppGSL::matrix<double> Sigma = lrgpr->coeff_covariance();
	RcppGSL::vector<double> pValues = lrgpr->wald_test_all();
	RcppGSL::vector<double> beta = lrgpr->get_beta();	

	Rcpp::List res = Rcpp::List::create(Rcpp::Named("coefficients") = beta,
										Rcpp::Named("p.values") 	= pValues,
										Rcpp::Named("sigSq_e") 		= sig_e,
										Rcpp::Named("sigSq_a") 		= sig_g,
										Rcpp::Named("delta") 		= delta, 
										Rcpp::Named("df") 			= df,
										Rcpp::Named("rank") 		= eigenValues->size,
										Rcpp::Named("logLik")		= log_L,
										Rcpp::Named("fitted.values")= Y_hat,
										Rcpp::Named("alpha")		= alpha,
										Rcpp::Named("Sigma")		= Sigma,		
										Rcpp::Named("hii")			= Hii  );
	
	X.free(); 
	y.free();
	eigenVectors.free();
	eigenValues.free();
	beta.free();
	Wtilde.free();

	delete lrgpr;

	Hii.free();
	alpha.free();
	Y_hat.free();
	Sigma.free();
	pValues.free();

	return res; // return the result list to R

//END_RCPP
}