Beispiel #1
0
CharAE *_new_CharAE_from_string(const char *string)
{
	CharAE *ae;

	ae = _new_CharAE(strlen(string));
	_CharAE_set_nelt(ae, ae->_buflength);
	memcpy(ae->elts, string, ae->_buflength);
	return ae;
}
/* --- .Call ENTRY POINT ---/
 * Same arguments as RangesList_encode_overlaps() plus:
 * 'query_hits', 'subject_hits': integer vectors of the same length.
 * 'flip_query': logical vector of the same length as 'query_hits'.
 */
SEXP Hits_encode_overlaps(SEXP query_starts, SEXP query_widths,
			  SEXP query_spaces, SEXP query_breaks,
			  SEXP subject_starts, SEXP subject_widths,
			  SEXP subject_spaces,
			  SEXP query_hits, SEXP subject_hits, SEXP flip_query)
{
	int q_len, s_len, ans_len, i, j, k;
	const int *q_hits, *s_hits;
	SEXP ans_Loffset, ans_Roffset, ans_encoding, ans_encoding_elt, ans;
	CharAE buf;

	/* TODO: Add some basic checking of the input values. */
	q_len = LENGTH(query_starts);
	s_len = LENGTH(subject_starts);
	ans_len = _check_integer_pairs(query_hits, subject_hits,
				       &q_hits, &s_hits,
				       "queryHits(hits)", "subjectHits(hits)");
	PROTECT(ans_Loffset = NEW_INTEGER(ans_len));
	PROTECT(ans_Roffset = NEW_INTEGER(ans_len));
	PROTECT(ans_encoding = NEW_CHARACTER(ans_len));
	buf = _new_CharAE(0);
	for (k = 0; k < ans_len; k++) {
		i = q_hits[k];
		j = s_hits[k];
		if (i == NA_INTEGER || i < 1 || i > q_len ||
		    j == NA_INTEGER || j < 1 || j > s_len) {
			UNPROTECT(3);
			error("'queryHits(hits)' or 'subjectHits(hits)' "
			      "contain invalid indices");
		}
		i--;
		j--;
		PROTECT(ans_encoding_elt = RangesList_encode_overlaps_ij(
				query_starts, query_widths,
				query_spaces, query_breaks,
				subject_starts, subject_widths, subject_spaces,
				i, j, LOGICAL(flip_query)[k],
				INTEGER(ans_Loffset) + k,
				INTEGER(ans_Roffset) + k,
				&buf));
		SET_STRING_ELT(ans_encoding, k, ans_encoding_elt);
		UNPROTECT(1);
		_CharAE_set_nelt(&buf, 0);
	}
	PROTECT(ans = make_LIST_from_ovenc_parts(ans_Loffset, ans_Roffset,
						 ans_encoding));
	UNPROTECT(4);
	return ans;
}
Beispiel #3
0
void _append_string_to_CharAE(CharAE *ae, const char *string)
{
	int nnewval, nelt, new_nelt;
	char *dest;

	nnewval = strlen(string);
	nelt = _CharAE_get_nelt(ae);
	new_nelt = nelt + nnewval;
	if (new_nelt > ae->_buflength)
		CharAE_extend(ae, new_nelt);
	dest = ae->elts + nelt;
	memcpy(dest, string, sizeof(char) * nnewval);
	_CharAE_set_nelt(ae, new_nelt);
	return;
}
Beispiel #4
0
/*
 * Delete 'nelt' elements, starting at position 'at'.
 * Doing _CharAE_delete_at(x, at, nelt) is equivalent to doing
 * _CharAE_delete_at(x, at, 1) 'nelt' times.
 */
void _CharAE_delete_at(CharAE *ae, int at, int nelt)
{
	char *c1_p;
	const char *c2_p;
	int nelt0, i2;

	if (nelt == 0)
		return;
	c1_p = ae->elts + at;
	c2_p = c1_p + nelt;
	nelt0 = _CharAE_get_nelt(ae);
	for (i2 = at + nelt; i2 < nelt0; i2++)
		*(c1_p++) = *(c2_p++);
	_CharAE_set_nelt(ae, nelt0 - nelt);
	return;
}
Beispiel #5
0
void _CharAE_insert_at(CharAE *ae, int at, char c)
{
	int nelt, i;
	char *elt1_p;
	const char *elt2_p;

	nelt = _CharAE_get_nelt(ae);
	if (nelt >= ae->_buflength)
		CharAE_extend(ae, -1);
	elt1_p = ae->elts + nelt;
	elt2_p = elt1_p - 1;
	for (i = nelt; i > at; i--)
		*(elt1_p--) = *(elt2_p--);
	*elt1_p = c;
	_CharAE_set_nelt(ae, nelt + 1);
	return;
}
/* --- .Call ENTRY POINT ---/
 * 'query_starts', 'query_widths', 'query_spaces': lists of integer vectors.
 * The 3 lists are assumed to have the same length (M) and shape.
 * 'query_breaks': NULL or integer vector of length M.
 * 'subject_starts', 'subject_widths', 'subject_spaces': lists of integer
 * vectors. The 3 lists are assumed to have the same length (N) and shape.
 * Return a named list with the 3 following components (all of the same
 * length):
 *     1. Loffset: integer vector;
 *     2. Roffset: integer vector;
 *     3. encoding: character vector containing the compact encodings (type
 *        II).
 */
SEXP RangesList_encode_overlaps(SEXP query_starts, SEXP query_widths,
				SEXP query_spaces, SEXP query_breaks,
				SEXP subject_starts, SEXP subject_widths,
				SEXP subject_spaces)
{
	int q_len, s_len, ans_len, i, j, k;
	SEXP ans_Loffset, ans_Roffset, ans_encoding, ans_encoding_elt, ans;
	CharAE buf;

	/* TODO: Add some basic checking of the input values. */
	q_len = LENGTH(query_starts);
	s_len = LENGTH(subject_starts);
	if (q_len == 0 || s_len == 0)
		ans_len = 0;
	else
		ans_len = q_len >= s_len ? q_len : s_len;
	PROTECT(ans_Loffset = NEW_INTEGER(ans_len));
	PROTECT(ans_Roffset = NEW_INTEGER(ans_len));
	PROTECT(ans_encoding = NEW_CHARACTER(ans_len));
	buf = _new_CharAE(0);
	for (i = j = k = 0; k < ans_len; i++, j++, k++) {
		if (i >= q_len)
			i = 0; /* recycle i */
		if (j >= s_len)
			j = 0; /* recycle j */
		PROTECT(ans_encoding_elt = RangesList_encode_overlaps_ij(
				query_starts, query_widths,
				query_spaces, query_breaks,
				subject_starts, subject_widths, subject_spaces,
				i, j, 0,
				INTEGER(ans_Loffset) + k,
				INTEGER(ans_Roffset) + k,
				&buf));
		SET_STRING_ELT(ans_encoding, k, ans_encoding_elt);
		UNPROTECT(1);
		_CharAE_set_nelt(&buf, 0);
	}
	if (ans_len != 0 && (i != q_len || j != s_len))
		warning("longer object length is not a multiple "
			"of shorter object length");
	PROTECT(ans = make_LIST_from_ovenc_parts(ans_Loffset, ans_Roffset,
						 ans_encoding));
	UNPROTECT(4);
	return ans;
}
/*
 * q_start, q_width: int arrays of length q_len. No NAs.
 * q_space: NULL or an int array of length q_len. No NAs.
 * q_len: nb of ranges in the query.
 * q_break: 0 if all the ranges in the query are coming from the same
 *         segment (single-end read), or, an int >= 1 and < q_len specifying
 *         the position of the break between the ranges coming from one
 *         segment and the ranges coming from the other if the query is a
 *         paired-end read.
 * flip_query: if non-zero, then the query is "flipped" before the encoding is
 *         computed.
 * s_start, s_width: int arrays of length s_len. No NAs.
 * s_space: NULL or an int array of length s_len. No NAs.
 * s_len: nb of ranges in the subject.
 * as_matrix, Loffset, Roffset: if as_matrix, then the full matrix of codes
 *         is returned and the returned values for Loffset and Roffset are
 *         undefined. Otherwise, the matrix is trimmed and the returned values
 *         for Loffset and Roffset are the number of cols removed on the left
 *         and right sides of the matrix, respectively.
 * out: character array containing the matrix of codes (possibly trimmed)
 */
static void unsafe_overlap_encoding(
		const int *q_start, const int *q_width, const int *q_space,
		int q_len,
		int q_break, int flip_query,
		const int *s_start, const int *s_width, const int *s_space,
		int s_len,
		int as_matrix, int *Loffset, int *Roffset, CharAE *out)
{
	int out_nelt0, i, starti, widthi, spacei, j, startj, widthj, spacej,
	    j1, j2, nrow;
	char letter;

	if (!as_matrix) {
		if (q_break != 0) {
			if (flip_query) {
				CharAE_append_int(out, q_len - q_break);
				CharAE_append_char(out, '-', 2);
				CharAE_append_int(out, q_break);
			} else {
				CharAE_append_int(out, q_break);
				CharAE_append_char(out, '-', 2);
				CharAE_append_int(out, q_len - q_break);
			}
		} else {
			CharAE_append_int(out, q_len);
		}
		CharAE_append_char(out, ':', 1);
		out_nelt0 = _CharAE_get_nelt(out);
	}
	/* j1: 0-based index of first (i.e. leftmost) col with a non-"m",
	       or 's_len' if there is no such col.
	   j2: 0-based index of last (i.e. rightmost) col with a non-"a",
	       or -1 if there is no such col. */
	j1 = s_len;
	j2 = -1;
	/* Walk col by col. */
	for (j = 0; j < s_len; j++) {
		startj = s_start[j];
		widthj = s_width[j];
		spacej = s_space == NULL ? 0 : s_space[j];
		if (flip_query) {
			for (i = q_len - 1; i >= 0; i--) {
				starti = q_start[i];
				widthi = q_width[i];
				spacei = q_space == NULL ? 0 : - q_space[i];
				letter = overlap_letter(starti, widthi, spacei,
							startj, widthj, spacej);
				CharAE_append_char(out, letter, 1);
				if (j1 == s_len && letter != 'm')
					j1 = j;
				if (letter != 'a')
					j2 = j;
				if (q_break != 0 && i == q_break)
					CharAE_append_char(out, '-', 2);
			}
		} else {
			for (i = 0; i < q_len; i++) {
				if (q_break != 0 && i == q_break)
					CharAE_append_char(out, '-', 2);
				starti = q_start[i];
				widthi = q_width[i];
				spacei = q_space == NULL ? 0 : q_space[i];
				letter = overlap_letter(starti, widthi, spacei,
							startj, widthj, spacej);
				CharAE_append_char(out, letter, 1);
				if (j1 == s_len && letter != 'm')
					j1 = j;
				if (letter != 'a')
					j2 = j;
			}
		}
	}
	if (as_matrix)
		return;
	/* By making 'j2' a 1-based index we will then have
	   0 <= j1 <= j2 <= s_len, which will simplify further
	   arithmetic/logic. */
	if (q_len == 0) {
		/* A 0-row matrix needs special treatment. */
		j2 = s_len;
	} else {
		j2++;
	}
	*Loffset = j1;
	*Roffset = s_len - j2;
	nrow = q_len;
	if (q_break != 0)
		nrow += 2;
	/* Remove "a"-cols on the right. */
	_CharAE_set_nelt(out, out_nelt0 + j2 * nrow);
	/* Remove "m"-cols on the left. */
	_CharAE_delete_at(out, out_nelt0, j1 * nrow);
	/* Insert ":" at the end of each remaining col. */
	for (j = j2 - j1; j >= 1; j--)
		_CharAE_insert_at(out, out_nelt0 + j * nrow, ':');
	return;
}