Ejemplo n.º 1
0
int main(int argc, char **argv) {
  mpi::environment env(argc, argv);
  mpi::communicator world;

  int np = world.size();
  int pid = world.rank();

  if (np < 2) std::exit(-1);

  boost::mt19937 engine;

  std::vector<int> nelms, offsets;
  int n = 0;
  for (int p = 0; p < np; ++p) {
    nelms.push_back((engine() & 3) + 1);
    offsets.push_back(n);
    n += nelms.back();
  }

  std::vector<uint32_t> source;
  if (world.rank() == 0) {
    for (int i = 0; i < n; ++i) source.push_back(engine());
    std::cout << "source: " << alps::write_vector(source) << std::endl;
  }

  std::vector<uint32_t> distributed;
  alps::distribute_vector(world, nelms, offsets, source, distributed);
  for (int p = 0; p < np; ++p) {
    if (p == pid)
      std::cout << "process " << p << ": " << alps::write_vector(distributed) << std::endl;
    std::cout << std::flush;
    world.barrier();
  }

  std::vector<uint32_t> collected(world.rank() == 0 ? n : 0);
  alps::collect_vector(world, nelms, offsets, distributed, collected);
  if (world.rank() == 0)
    std::cout << "collected: " << alps::write_vector(collected) << std::endl;

  if (world.rank() == 0 && source != collected) {
    std::cout << "test failed\n";
    std::exit(-1);
  }
}
Ejemplo n.º 2
0
SEXP internal_loop (const base_finder * const ffptr, status (*check_self_status)(const segment&, const segment&), const check_invalid_chimera * const icptr,
		SEXP pairlen, SEXP chrs, SEXP pos, SEXP flag, SEXP cigar, SEXP mapqual, SEXP chimera_strict, SEXP minqual, SEXP do_dedup) {

	// Checking input values.
	if (!isInteger(pairlen)) { throw std::runtime_error("length of pairs must be an integer vector"); }
	if (!isInteger(chrs)) { throw std::runtime_error("chromosomes must be an integer vector"); }
	if (!isInteger(pos)) { throw std::runtime_error("positions must be an integer vector"); }
	if (!isInteger(flag)) { throw std::runtime_error("SAM flags must be an integer vector"); }
	if (!isString(cigar)) { throw std::runtime_error("CIGAR strings must be a character vector"); }
	if (!isInteger(mapqual)) { throw std::runtime_error("mapping quality must be an integer vector"); }
	const int nreads=LENGTH(chrs);
	if (LENGTH(pos)!=nreads || LENGTH(flag)!=nreads || LENGTH(cigar)!=nreads || LENGTH(mapqual)!=nreads) {
		throw std::runtime_error("lengths of vectors of read information are not consistent");
	}
	if (!isLogical(chimera_strict) || LENGTH(chimera_strict)!=1) { throw std::runtime_error("chimera removal specification should be a logical scalar"); }
	const int npairs=LENGTH(pairlen);
	if (!isLogical(do_dedup) || LENGTH(do_dedup)!=1) { throw std::runtime_error("duplicate removal specification should be a logical scalar"); }
	if (!isInteger(minqual) || LENGTH(minqual)!=1) { throw std::runtime_error("minimum mapping quality should be an integer scalar"); }

	// Initializing pointers.
	const int* cptr=INTEGER(chrs);
	const int* pptr=INTEGER(pos);
	const int* fptr=INTEGER(flag);
	const int* qptr=INTEGER(mapqual);
	const bool rm_invalid=asLogical(chimera_strict);
	const bool rm_dup=asLogical(do_dedup);
	const int minq=asInteger(minqual);
	const bool rm_min=!ISNA(minq);
	const int * plptr=INTEGER(pairlen);
	const size_t nc=ffptr->nchrs();

	// Constructing output containers
	std::deque<std::deque<std::deque<valid_pair> > > collected(nc);
	for (size_t i=0; i<nc; ++i) { collected[i].resize(i+1); }
	std::deque<segment> read1, read2;
	segment current;
	valid_pair curpair;
	int single=0;
	int total=0, dupped=0, filtered=0, mapped=0;
	int dangling=0, selfie=0;
	int total_chim=0, mapped_chim=0, multi_chim=0, inv_chimeras=0;

	// Running through all reads and identifying the interaction they represent.
	int index=0, limit, pindex=0;
	while (index < nreads) {
		read1.clear();
		read2.clear();
		if (pindex==npairs) { throw std::runtime_error("ran out of pairs before running out of reads"); }
		const int& curpl=plptr[pindex];
		++pindex;
		limit=index+curpl;
		if (limit > nreads) { throw std::runtime_error("ran out of reads before running out of pairs"); }

		// Various flags that will be needed.
		bool isdup=false, isunmap=false, ischimera=false,
		     isfirst=false, hasfirst=false, hassecond=false,
		     curdup=false, curunmap=false;

		// Running through and collecting read segments.
		while (index < limit) {
			const int& curflag=fptr[index];
			current.reverse=(curflag & 0x10);
			current.chrid=cptr[index];
			current.pos=pptr[index];
			parse_cigar(CHAR(STRING_ELT(cigar, index)), current.alen, current.offset, current.reverse);

			// Checking how we should proceed; whether we should bother adding it or not.
			curdup=(curflag & 0x400);
			curunmap=(curflag & 0x4 || (rm_min && qptr[index] < minq));
			if (current.offset==0) {
				if (curdup) { isdup=true; }
				if (curunmap) { isunmap=true; }
			} else {
				ischimera=true;
			}

			// Checking what it is.
			isfirst = (curflag & 0x40);
			if (isfirst) { hasfirst=true; }
			else { hassecond=true; }

			// Checking which deque to put it in, if we're going to keep it.
			if (! (curdup && rm_dup) && ! curunmap) {
				std::deque<segment>& current_reads=(isfirst ? read1 : read2);
				if (current.offset==0) {
					current_reads.push_front(current);
				} else {
					current_reads.push_back(current);
				}
			}
			++index;
		}

		// Skipping if it's a singleton; otherwise, reporting it as part of the total read pairs.
		if (! (hasfirst && hassecond)) {
			++single;
			continue;
		}
		++total;

		// Adding to other statistics.
		if (ischimera) { ++total_chim; }
		if (isdup) { ++dupped; }
		if (isunmap) { ++filtered; }

		/* Skipping if unmapped, marked (and we're removing them), and if the first alignment
		 * of either read has any hard 5' clipping. This means that it's not truly 5' terminated
		 * (e.g. the actual 5' end was unmapped, duplicate removed or whatever). Note that
		 * not skipping UNMAP or DUP does not imply non-empty sets, as UNMAP/DUP are only set
		 * for 0-offset alignments; if this isn't in the file, these flags won't get set, but
		 * the sets can still be empty if non-zero-offset alignments are present and filtered
		 * (to escape the singles clause above). Thus, we need to check non-emptiness explicitly.
 		 */
		if (isunmap || (rm_dup && isdup) || read1.empty() || read2.empty() || read1.front().offset || read2.front().offset) { continue; }
		++mapped;

		// Assigning fragment IDs, if everything else is good.
		for (size_t i1=0; i1<read1.size(); ++i1) {
			segment& current=read1[i1];
			current.fragid=ffptr->find_fragment(current.chrid, current.pos, current.reverse, current.alen);
		}
		for (size_t i2=0; i2<read2.size(); ++i2) {
			segment& current=read2[i2];
			current.fragid=ffptr->find_fragment(current.chrid, current.pos, current.reverse, current.alen);
		}

		// Determining the type of construct if they have the same ID.
		switch ((*check_self_status)(read1.front(), read2.front())) {
			case ISPET:
				++dangling;
				continue;
			case ISMATE:
				++selfie;
				continue;
			default:
				break;
		}

		// Pulling out chimera diagnostics.
		if (ischimera) {
			++mapped_chim;
 		   	++multi_chim;	
			bool invalid=false;
			if (read1.size()==1 && read2.size()==1) {
				--multi_chim;
			} else if (read1.size() > 2 || read2.size() > 2) {
				invalid=true;
			} else {
				invalid=(*icptr)(read1, read2);
			}
			if (invalid) {
				++inv_chimeras;
				if (rm_invalid) { continue; }
			}
		}
		
		// Choosing the anchor segment, and reporting it.
		bool anchor=false;
		if (read1.front().chrid > read2.front().chrid) {
 		   anchor=true;
	   	} else if (read1.front().chrid==read2.front().chrid) {
			if (read1.front().fragid > read2.front().fragid) {
				anchor=true;
			} else if (read1.front().fragid == read2.front().fragid) {
				if (read1.front().pos > read2.front().pos) {
					anchor=true;
				}
			}
		}
		const segment& anchor_seg=(anchor ? read1.front() : read2.front());
		const segment& target_seg=(anchor ? read2.front() : read1.front());
		
		curpair.anchor=anchor_seg.fragid;
		curpair.target=target_seg.fragid;
		curpair.apos=anchor_seg.pos;
		curpair.alen=anchor_seg.alen;
		if (anchor_seg.reverse) { curpair.alen*=-1; }
		curpair.tpos=target_seg.pos;
		curpair.tlen=target_seg.alen;
		if (target_seg.reverse) { curpair.tlen*=-1; }

		if (curpair.alen==0 || curpair.tlen==0) { throw std::runtime_error("alignment lengths of zero should not be present"); }
		collected[anchor_seg.chrid][target_seg.chrid].push_back(curpair);
	}

	// Checking if all pairs were used up.
	if (pindex!=npairs) { throw std::runtime_error("ran out of reads before running out of pairs"); }

	SEXP total_output=PROTECT(allocVector(VECSXP, 6));
	try {
		// Checking how many are not (doubly) empty.
		std::deque<std::pair<int, int> > good;
		for (size_t i=0; i<nc; ++i) {
			for (size_t j=0; j<=i; ++j) {
				const std::deque<valid_pair>& curpairs=collected[i][j];
				if (!curpairs.empty()) { good.push_back(std::make_pair(i, j)); }
			}
		}	

		SET_VECTOR_ELT(total_output, 0, allocMatrix(INTSXP, good.size(), 2));
		int* aptr=INTEGER(VECTOR_ELT(total_output, 0));
		int* tptr=aptr+good.size();
		SET_VECTOR_ELT(total_output, 1, allocVector(VECSXP, good.size()));
		SEXP output=VECTOR_ELT(total_output, 1);

		for (size_t i=0; i<good.size(); ++i) {
			aptr[i]=good[i].first+1;
			tptr[i]=good[i].second+1;

			// Filling up those non-empty pairs of chromosomes.
			std::deque<valid_pair>& curpairs=collected[good[i].first][good[i].second];
			SET_VECTOR_ELT(output, i, allocMatrix(INTSXP, curpairs.size(), 6));
			int* axptr=INTEGER(VECTOR_ELT(output, i));
			int* txptr=axptr+curpairs.size();
			int* apxptr=txptr+curpairs.size();
			int* tpxptr=apxptr+curpairs.size();
			int* afxptr=tpxptr+curpairs.size();
			int* tfxptr=afxptr+curpairs.size();
			for (size_t k=0; k<curpairs.size(); ++k) {
				axptr[k]=curpairs[k].anchor+1;
				txptr[k]=curpairs[k].target+1;
				apxptr[k]=curpairs[k].apos;
				tpxptr[k]=curpairs[k].tpos;
				afxptr[k]=curpairs[k].alen;
				tfxptr[k]=curpairs[k].tlen;
			}

			// Emptying out the container once we've processed it, to keep memory usage down.
			std::deque<valid_pair>().swap(curpairs);
		}

		// Dumping mapping diagnostics.
		SET_VECTOR_ELT(total_output, 2, allocVector(INTSXP, 4));
		int* dptr=INTEGER(VECTOR_ELT(total_output, 2));
		dptr[0]=total;
		dptr[1]=dupped;
		dptr[2]=filtered;
		dptr[3]=mapped;
	
		// Dumping the number of dangling ends, self-circles.	
		SET_VECTOR_ELT(total_output, 3, allocVector(INTSXP, 2));
		int * siptr=INTEGER(VECTOR_ELT(total_output, 3));
		siptr[0]=dangling;
		siptr[1]=selfie;

		// Dumping the number designated 'single', as there's no pairs.
		SET_VECTOR_ELT(total_output, 4, ScalarInteger(single));

		// Dumping chimeric diagnostics.
		SET_VECTOR_ELT(total_output, 5, allocVector(INTSXP, 4));
		int* cptr=INTEGER(VECTOR_ELT(total_output, 5));
		cptr[0]=total_chim;
		cptr[1]=mapped_chim;
		cptr[2]=multi_chim;
		cptr[3]=inv_chimeras;
	} catch (std::exception& e) {
		UNPROTECT(1);
		throw;
	}
	UNPROTECT(1);
	return total_output;
}