SEXP top_cumprop_internal(Rcpp::RObject incoming, Rcpp::IntegerVector topset) { auto mat=beachmat::create_matrix<M>(incoming); const size_t ncells=mat->get_ncol(); const size_t ngenes=mat->get_nrow(); check_topset(topset); Rcpp::NumericMatrix percentages(topset.size(), ncells); typename M::vector holder(ngenes); for (size_t c=0; c<ncells; ++c) { mat->get_col(c, holder.begin()); // need to copy as cumsum will change ordering. double totals=std::accumulate(holder.begin(), holder.end(), static_cast<typename M::type>(0)); auto cur_col=percentages.column(c); compute_cumsum<typename M::type, typename M::vector>(holder.begin(), ngenes, topset, cur_col.begin()); for (auto& p : cur_col) { p/=totals; } } return percentages; }
SEXP combined_qc_internal(Rcpp::RObject input, Rcpp::IntegerVector start, Rcpp::IntegerVector end, Rcpp::List featcon, Rcpp::List cellcon, Rcpp::IntegerVector topset, typename M::vector detection_limit) { auto mat=beachmat::create_matrix<M>(input); const size_t ncells=mat->get_ncol(); const size_t ngenes=mat->get_nrow(); if (detection_limit.size()!=1) { throw std::runtime_error("detection limit should be a scalar"); } typename M::type limit=detection_limit[0]; // Defining the subset of cells for which to perform the calculation. const size_t firstcell=check_integer_scalar(start, "first cell index"); const size_t lastcell=check_integer_scalar(end, "last cell index"); if (firstcell > lastcell || lastcell > ncells) { throw std::runtime_error("cell indices for parallel execution are out of range"); } const size_t n_usedcells=lastcell - firstcell; // Setting up per-cell statistics (for each feature control set). const size_t nfcontrols=featcon.size(); typedef per_cell_statistics<typename M::type, typename M::vector> cell_stats; cell_stats all_PCS(n_usedcells, limit, ngenes, topset); std::vector<cell_stats> control_PCS(nfcontrols); for (size_t fx=0; fx<nfcontrols; ++fx) { Rcpp::IntegerVector current=process_subset_vector(featcon[fx], ngenes, false); // converts to zero-index. control_PCS[fx]=cell_stats(n_usedcells, limit, current, topset); } // Setting up per-feature statistics (for each cell control set). const size_t nccontrols=cellcon.size(); std::vector<std::vector<size_t> > chosen_ccs(n_usedcells); typedef per_gene_statistics<typename M::type, typename M::vector> gene_stats; gene_stats all_PGS(ngenes, limit); std::vector<gene_stats> control_PGS(nccontrols); for (size_t cx=0; cx<nccontrols; ++cx) { Rcpp::IntegerVector current=process_subset_vector(cellcon[cx], ncells, false); // converts to zero-index. for (auto curcell : current) { size_t cur_index=curcell; if (firstcell <= cur_index && cur_index < lastcell) { chosen_ccs[cur_index - firstcell].push_back(cx); } } control_PGS[cx]=gene_stats(ngenes, limit); } // Running through the requested stretch of cells. // Difficult in this framework to support sparsity, // due to the need to consider arbitrary subsets of features, // so we'll limit ourselves to avoiding the copy for dense arrays. beachmat::const_column<M> col_holder(mat.get(), false); for (size_t c=0; c<n_usedcells; ++c) { col_holder.fill(c+firstcell); auto it=col_holder.get_values(); all_PCS.fill(it); for (size_t fx=0; fx<nfcontrols; ++fx) { control_PCS[fx].fill_subset(it); } all_PGS.compute_summaries(it); auto& chosen_cc=chosen_ccs[c]; for (auto& cx : chosen_cc) { control_PGS[cx].compute_summaries(it); } } // Creating a list for all per-cell statistics, and again for all per-feature statistics. Rcpp::List output_per_cell(1+nfcontrols); output_per_cell[0]=create_output_per_cell(all_PCS); for (size_t fx=0; fx<nfcontrols; ++fx) { output_per_cell[fx+1]=create_output_per_cell(control_PCS[fx]); } Rcpp::List output_per_gene(1+nccontrols); output_per_gene[0]=create_output_per_gene(all_PGS); for (size_t cx=0; cx<nccontrols; ++cx) { output_per_gene[cx+1]=create_output_per_gene(control_PGS[cx]); } return Rcpp::List::create(output_per_cell, output_per_gene); }
int main(int argc, char **argv) { // parse the arguments, get the matrix size int n; if (argc > 1) { n = atoi(argv[1]); } else { n = 1000; // defualt value } int numproc, rank; int nrow, uni_chunk_size; double time; /*******************Initialization*****************/ // intialize and find the basic info MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &numproc); // partition the matrix by row, get the partition size nrow = get_nrow(rank, numproc, n); uni_chunk_size = (int)ceil((double)n / numproc); //printf("HERE!!!\n"); //printf("Chunk size: = %d, nrow = %d\n", uni_chunk_size, nrow); //printf("proc:%d, ncol: %d\n", rank, nrow); // allocate the memory for the array double **local_arr; local_arr = arr_mem_loc(nrow, n); arr_init_val(local_arr, rank, uni_chunk_size, nrow, n); MPI_Barrier(MPI_COMM_WORLD); // get the current time if (rank == 0) time = MPI_Wtime(); /*******************Iterations****************/ double* first_row; double* last_row; double* halo_up; double* halo_bt; MPI_Request up_request; MPI_Request bt_request; MPI_Status up_status; MPI_Status bt_status; // do iterations double** new_arr = new double*[nrow]; for (int i = 0; i < nrow; i++) { new_arr[i] = new double[n]; } for (int iter = 0; iter < N_ITER; iter++) { //printf("Iteration : %d\n", iter); first_row = local_arr[1]; last_row = local_arr[nrow]; halo_up = local_arr[0]; halo_bt = local_arr[nrow + 1]; // send the halo rows to neighbours int up_nb_rank = (rank - 1 + numproc) % numproc; int bt_nb_rank = (rank + 1 + numproc) % numproc; MPI_Isend(first_row, n, MPI_DOUBLE, up_nb_rank, 0, MPI_COMM_WORLD, &up_request); MPI_Isend(last_row, n, MPI_DOUBLE, bt_nb_rank, 0, MPI_COMM_WORLD, &bt_request); MPI_Recv(halo_up, n, MPI_DOUBLE, up_nb_rank, 0, MPI_COMM_WORLD, &up_status); MPI_Recv(halo_bt, n, MPI_DOUBLE, bt_nb_rank, 0, MPI_COMM_WORLD, &bt_status); // update the cells for (int i = 0; i < nrow; i++) { for (int j = 0; j < n; j++) { // do not update the boundary cell if (j == 0 || j == n-1) new_arr[i][j] = local_arr[i + 1][j]; else { // average of all neighbours and it self new_arr[i][j] = (local_arr[i][j - 1] + local_arr[i][j] + local_arr[i][j + 1] + local_arr[i + 1][j - 1] + local_arr[i + 1][j] + local_arr[i + 1][j + 1] + local_arr[i + 2][j - 1] + local_arr[i + 2][j] + local_arr[i + 2][j + 1])/9.0; } } } // copy back the values for (int i = 0; i < nrow; i++) { for (int j = 0; j < n; j++) { local_arr[i + 1][j] = new_arr[i][j]; } } } // clear the memory for (int i = 0; i < nrow; i++) { delete[] new_arr[i]; } delete[] new_arr; /**************************Calulate the verification sum*****************************/ // calculate the local verification sum int global_i; double local_veri_sum = 0; for (int i = 1; i < nrow + 1; i++) { global_i = uni_chunk_size * rank + i - 1; for (int j = 0; j < n; j++) { if (global_i == j) local_veri_sum += local_arr[i][j]; } } // add up all local verification sum double total_veri_sum = 0; MPI_Reduce(&local_veri_sum, &total_veri_sum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); // calculate and print out the time if (rank == 0) { time = MPI_Wtime() - time; printf("Verification sum = %.6f, Wall time = %.2f\n", total_veri_sum, time); } arr_mem_clc(local_arr, nrow, n); MPI_Finalize(); return 0; }