Exemple #1
0
static SEXP make_fasta_index_data_frame(const IntAE *recno_buf,
					const IntAE *fileno_buf,
					const LLongAE *offset_buf,
					const CharAEAE *desc_buf,
					const IntAE *seqlength_buf)
{
	SEXP df, colnames, tmp;
	int i;

	PROTECT(df = NEW_LIST(5));

	PROTECT(colnames = NEW_CHARACTER(5));
	PROTECT(tmp = mkChar("recno"));
	SET_STRING_ELT(colnames, 0, tmp);
	UNPROTECT(1);
	PROTECT(tmp = mkChar("fileno"));
	SET_STRING_ELT(colnames, 1, tmp);
	UNPROTECT(1);
	PROTECT(tmp = mkChar("offset"));
	SET_STRING_ELT(colnames, 2, tmp);
	UNPROTECT(1);
	PROTECT(tmp = mkChar("desc"));
	SET_STRING_ELT(colnames, 3, tmp);
	UNPROTECT(1);
	PROTECT(tmp = mkChar("seqlength"));
	SET_STRING_ELT(colnames, 4, tmp);
	UNPROTECT(1);
	SET_NAMES(df, colnames);
	UNPROTECT(1);

	PROTECT(tmp = new_INTEGER_from_IntAE(recno_buf));
	SET_ELEMENT(df, 0, tmp);
	UNPROTECT(1);

	PROTECT(tmp = new_INTEGER_from_IntAE(fileno_buf));
	SET_ELEMENT(df, 1, tmp);
	UNPROTECT(1);

	PROTECT(tmp = NEW_NUMERIC(LLongAE_get_nelt(offset_buf)));
	for (i = 0; i < LENGTH(tmp); i++)
		REAL(tmp)[i] = (double) offset_buf->elts[i];
	SET_ELEMENT(df, 2, tmp);
	UNPROTECT(1);

	PROTECT(tmp = new_CHARACTER_from_CharAEAE(desc_buf));
	SET_ELEMENT(df, 3, tmp);
	UNPROTECT(1);

	PROTECT(tmp = new_INTEGER_from_IntAE(seqlength_buf));
	SET_ELEMENT(df, 4, tmp);
	UNPROTECT(1);

	/* list_as_data_frame() performs IN-PLACE coercion */
	list_as_data_frame(df, IntAE_get_nelt(recno_buf));
	UNPROTECT(1);
	return df;
}
Exemple #2
0
/* --- .Call ENTRY POINT --- */
SEXP fasta_index(SEXP filexp_list,
		 SEXP nrec, SEXP skip, SEXP seek_first_rec, SEXP lkup)
{
	int nrec0, skip0, seek_rec0, i, recno, old_nrec, new_nrec, k;
	FASTAINDEX_loaderExt loader_ext;
	FASTAloader loader;
	IntAE *seqlength_buf, *fileno_buf;
	SEXP filexp;
	long long int offset, ninvalid;
	const char *errmsg;

	nrec0 = INTEGER(nrec)[0];
	skip0 = INTEGER(skip)[0];
	seek_rec0 = LOGICAL(seek_first_rec)[0];
	loader_ext = new_FASTAINDEX_loaderExt();
	loader = new_FASTAINDEX_loader(lkup, 1, &loader_ext);
	seqlength_buf = loader_ext.seqlength_buf;
	fileno_buf = new_IntAE(0, 0, 0);
	for (i = recno = 0; i < LENGTH(filexp_list); i++) {
		filexp = VECTOR_ELT(filexp_list, i);
		offset = ninvalid = 0LL;
		errmsg = parse_FASTA_file(filexp, nrec0, skip0, seek_rec0,
					  &loader, &recno, &offset, &ninvalid);
		if (errmsg != NULL)
			error("reading FASTA file %s: %s",
			      CHAR(STRING_ELT(GET_NAMES(filexp_list), i)),
			      errmsg_buf);
		if (ninvalid != 0LL)
			warning("reading FASTA file %s: ignored %lld "
				"invalid one-letter sequence codes",
				CHAR(STRING_ELT(GET_NAMES(filexp_list), i)),
				ninvalid);
		old_nrec = IntAE_get_nelt(fileno_buf);
		new_nrec = IntAE_get_nelt(seqlength_buf);
		for (k = old_nrec; k < new_nrec; k++)
			IntAE_insert_at(fileno_buf, k, i + 1);
	}
	return make_fasta_index_data_frame(loader_ext.recno_buf,
					   fileno_buf,
					   loader_ext.offset_buf,
					   loader_ext.desc_buf,
					   seqlength_buf);
}
Exemple #3
0
static void FASTAINDEX_load_empty_seq(FASTAloader *loader)
{
	FASTAINDEX_loaderExt *loader_ext;
	IntAE *seqlength_buf;

	loader_ext = loader->ext;
	seqlength_buf = loader_ext->seqlength_buf;
	IntAE_insert_at(seqlength_buf, IntAE_get_nelt(seqlength_buf), 0);
	return;
}
Exemple #4
0
void _MatchBuf_report_match(MatchBuf *match_buf,
		int PSpair_id, int start, int width)
{
	IntAE *PSlink_ids, *count_buf, *start_buf, *width_buf;

	PSlink_ids = match_buf->PSlink_ids;
	count_buf = match_buf->match_counts;
	if (count_buf->elts[PSpair_id]++ == 0)
		IntAE_insert_at(PSlink_ids,
			IntAE_get_nelt(PSlink_ids), PSpair_id);
	if (match_buf->match_starts != NULL) {
		start_buf = match_buf->match_starts->elts[PSpair_id];
		IntAE_insert_at(start_buf, IntAE_get_nelt(start_buf), start);
	}
	if (match_buf->match_widths != NULL) {
		width_buf = match_buf->match_widths->elts[PSpair_id];
		IntAE_insert_at(width_buf, IntAE_get_nelt(width_buf), width);
	}
	return;
}
Exemple #5
0
static void FASTAINDEX_load_seq_data(FASTAloader *loader,
		const Chars_holder *seq_data)
{
	FASTAINDEX_loaderExt *loader_ext;
	IntAE *seqlength_buf;

	loader_ext = loader->ext;
	seqlength_buf = loader_ext->seqlength_buf;
	seqlength_buf->elts[IntAE_get_nelt(seqlength_buf) - 1] +=
		seq_data->length;
	return;
}
Exemple #6
0
void _MatchBuf_flush(MatchBuf *match_buf)
{
	int nelt, i, PSlink_id;

	nelt = IntAE_get_nelt(match_buf->PSlink_ids);
	for (i = 0; i < nelt; i++) {
		PSlink_id = match_buf->PSlink_ids->elts[i];
		match_buf->match_counts->elts[PSlink_id] = 0;
		if (match_buf->match_starts != NULL)
			IntAE_set_nelt(match_buf->match_starts->elts[PSlink_id], 0);
		if (match_buf->match_widths != NULL)
			IntAE_set_nelt(match_buf->match_widths->elts[PSlink_id], 0);
	}
	IntAE_set_nelt(match_buf->PSlink_ids, 0);
	return;
}
Exemple #7
0
static void FASTAINDEX_load_desc_line(FASTAloader *loader,
				      int recno, long long int offset,
				      const Chars_holder *desc_line)
{
	FASTAINDEX_loaderExt *loader_ext;
	IntAE *recno_buf;
	LLongAE *offset_buf;
	CharAEAE *desc_buf;

	loader_ext = loader->ext;

	recno_buf = loader_ext->recno_buf;
	IntAE_insert_at(recno_buf, IntAE_get_nelt(recno_buf), recno + 1);

	offset_buf = loader_ext->offset_buf;
	LLongAE_insert_at(offset_buf, LLongAE_get_nelt(offset_buf), offset);

	desc_buf = loader_ext->desc_buf;
	// This works only because desc_line->seq is nul-terminated!
	append_string_to_CharAEAE(desc_buf, desc_line->ptr);
	return;
}
Exemple #8
0
static SEXP vwhich_XStringSet_XStringSet(SEXP pattern,
		SEXP subject,
		SEXP max_mismatch, SEXP min_mismatch,
		SEXP with_indels, SEXP fixed,
		SEXP algorithm)
{
	XStringSet_holder P, S;
	int P_length, S_length, i, j;
	Chars_holder P_elt, S_elt;
	const char *algo;
	IntAEAE *ans_buf;

	P = _hold_XStringSet(pattern);
	P_length = _get_length_from_XStringSet_holder(&P);
	S = _hold_XStringSet(subject);
	S_length = _get_length_from_XStringSet_holder(&S);
	algo = CHAR(STRING_ELT(algorithm, 0));
	ans_buf = new_IntAEAE(S_length, S_length);
	for (j = 0; j < S_length; j++)
		IntAE_set_nelt(ans_buf->elts[j], 0);
	_init_match_reporting("MATCHES_AS_COUNTS", 1);
	for (i = 0; i < P_length; i++) {
		P_elt = _get_elt_from_XStringSet_holder(&P, i);
		for (j = 0; j < S_length; j++) {
			S_elt = _get_elt_from_XStringSet_holder(&S, j);
			_match_pattern_XString(&P_elt, &S_elt,
				max_mismatch, min_mismatch, with_indels, fixed,
				algo);
			if (_get_match_count() != 0)
				IntAE_insert_at(ans_buf->elts[j],
					IntAE_get_nelt(ans_buf->elts[j]),
					i + 1);
			_drop_reported_matches();
		}
	}
	return new_LIST_from_IntAEAE(ans_buf, 0);
}
Exemple #9
0
void _MatchBuf_append_and_flush(MatchBuf *match_buf1,
		MatchBuf *match_buf2, int view_offset)
{
	int nelt, i, PSlink_id;
	IntAE *start_buf1, *start_buf2, *width_buf1, *width_buf2;

	if (match_buf1->ms_code == MATCHES_AS_NULL
	 || match_buf2->ms_code == MATCHES_AS_NULL)
		return;
	if (IntAE_get_nelt(match_buf1->match_counts) !=
	    IntAE_get_nelt(match_buf2->match_counts)
	 || match_buf1->ms_code != match_buf2->ms_code)
		error("Biostrings internal error in "
		      "_MatchBuf_append_and_flush(): "
		      "buffers are incompatible");
	nelt = IntAE_get_nelt(match_buf2->PSlink_ids);
	for (i = 0; i < nelt; i++) {
		PSlink_id = match_buf2->PSlink_ids->elts[i];
		if (match_buf1->match_counts->elts[PSlink_id] == 0)
			IntAE_insert_at(match_buf1->PSlink_ids,
				IntAE_get_nelt(match_buf1->PSlink_ids),
				PSlink_id);
		match_buf1->match_counts->elts[PSlink_id] +=
			match_buf2->match_counts->elts[PSlink_id];
		if (match_buf1->match_starts != NULL) {
			start_buf1 = match_buf1->match_starts->elts[PSlink_id];
			start_buf2 = match_buf2->match_starts->elts[PSlink_id];
			IntAE_append_shifted_vals(start_buf1,
				start_buf2->elts, IntAE_get_nelt(start_buf2),
				view_offset);
		}
		if (match_buf1->match_widths != NULL) {
			width_buf1 = match_buf1->match_widths->elts[PSlink_id];
			width_buf2 = match_buf2->match_widths->elts[PSlink_id];
			IntAE_append(width_buf1,
				width_buf2->elts, IntAE_get_nelt(width_buf2));
		}
	}
	_MatchBuf_flush(match_buf2);
	return;
}