static void CharAE_append_char(CharAE *char_ae, char c, int times) { int i; for (i = 0; i < times; i++) _CharAE_insert_at(char_ae, _CharAE_get_nelt(char_ae), c); return; }
SEXP _new_RAW_from_CharAE(const CharAE *ae) { int nelt; SEXP ans; if (sizeof(Rbyte) != sizeof(char)) // should never happen! error("_new_RAW_from_CharAE(): sizeof(Rbyte) != sizeof(char)"); nelt = _CharAE_get_nelt(ae); PROTECT(ans = NEW_RAW(nelt)); memcpy(RAW(ans), ae->elts, sizeof(char) * nelt); UNPROTECT(1); return ans; }
void _append_string_to_CharAE(CharAE *ae, const char *string) { int nnewval, nelt, new_nelt; char *dest; nnewval = strlen(string); nelt = _CharAE_get_nelt(ae); new_nelt = nelt + nnewval; if (new_nelt > ae->_buflength) CharAE_extend(ae, new_nelt); dest = ae->elts + nelt; memcpy(dest, string, sizeof(char) * nnewval); _CharAE_set_nelt(ae, new_nelt); return; }
/* * Delete 'nelt' elements, starting at position 'at'. * Doing _CharAE_delete_at(x, at, nelt) is equivalent to doing * _CharAE_delete_at(x, at, 1) 'nelt' times. */ void _CharAE_delete_at(CharAE *ae, int at, int nelt) { char *c1_p; const char *c2_p; int nelt0, i2; if (nelt == 0) return; c1_p = ae->elts + at; c2_p = c1_p + nelt; nelt0 = _CharAE_get_nelt(ae); for (i2 = at + nelt; i2 < nelt0; i2++) *(c1_p++) = *(c2_p++); _CharAE_set_nelt(ae, nelt0 - nelt); return; }
void _CharAE_insert_at(CharAE *ae, int at, char c) { int nelt, i; char *elt1_p; const char *elt2_p; nelt = _CharAE_get_nelt(ae); if (nelt >= ae->_buflength) CharAE_extend(ae, -1); elt1_p = ae->elts + nelt; elt2_p = elt1_p - 1; for (i = nelt; i > at; i--) *(elt1_p--) = *(elt2_p--); *elt1_p = c; _CharAE_set_nelt(ae, nelt + 1); return; }
SEXP _new_CHARACTER_from_CharAEAE(const CharAEAE *aeae) { int nelt, i; SEXP ans, ans_elt; CharAE *ae; nelt = _CharAEAE_get_nelt(aeae); PROTECT(ans = NEW_CHARACTER(nelt)); for (i = 0; i < nelt; i++) { ae = aeae->elts[i]; PROTECT(ans_elt = mkCharLen(ae->elts, _CharAE_get_nelt(ae))); SET_STRING_ELT(ans, i, ans_elt); UNPROTECT(1); } UNPROTECT(1); return ans; }
/* only until we have a bitset or something smaller than char */ SEXP _new_LOGICAL_from_CharAE(const CharAE *ae) { int nelt, i, *ans_elt; SEXP ans; const char *elt; nelt = _CharAE_get_nelt(ae); PROTECT(ans = NEW_LOGICAL(nelt)); for (i = 0, ans_elt = LOGICAL(ans), elt = ae->elts; i < nelt; i++, ans_elt++, elt++) { *ans_elt = *elt; } UNPROTECT(1); return ans; }
/* type: 0=CHARSXP, 1=STRSXP, 2=RAWSXP as_matrix: 0 or 1, ignored when type is 0 q_len, q_break, s_len: ignored when type is 0 */ static SEXP make_encoding_from_CharAE(const CharAE *buf, int type, int as_matrix, int q_len, int q_break, int s_len) { SEXP ans, ans_elt, ans_dim; int buf_nelt, i, nrow; buf_nelt = _CharAE_get_nelt(buf); if (type == 0 || (type == 1 && !as_matrix)) { PROTECT(ans = mkCharLen(buf->elts, buf_nelt)); if (type == 1) { PROTECT(ans = ScalarString(ans)); UNPROTECT(1); } UNPROTECT(1); return ans; } if (type == 1) { PROTECT(ans = NEW_CHARACTER(buf_nelt)); for (i = 0; i < buf_nelt; i++) { PROTECT(ans_elt = mkCharLen(buf->elts + i, 1)); SET_STRING_ELT(ans, i, ans_elt); UNPROTECT(1); } } else { PROTECT(ans = _new_RAW_from_CharAE(buf)); } if (as_matrix) { nrow = q_len; if (q_break != 0) nrow += 2; PROTECT(ans_dim = NEW_INTEGER(2)); INTEGER(ans_dim)[0] = nrow; INTEGER(ans_dim)[1] = s_len; SET_DIM(ans, ans_dim); UNPROTECT(1); } UNPROTECT(1); return ans; }
/* * q_start, q_width: int arrays of length q_len. No NAs. * q_space: NULL or an int array of length q_len. No NAs. * q_len: nb of ranges in the query. * q_break: 0 if all the ranges in the query are coming from the same * segment (single-end read), or, an int >= 1 and < q_len specifying * the position of the break between the ranges coming from one * segment and the ranges coming from the other if the query is a * paired-end read. * flip_query: if non-zero, then the query is "flipped" before the encoding is * computed. * s_start, s_width: int arrays of length s_len. No NAs. * s_space: NULL or an int array of length s_len. No NAs. * s_len: nb of ranges in the subject. * as_matrix, Loffset, Roffset: if as_matrix, then the full matrix of codes * is returned and the returned values for Loffset and Roffset are * undefined. Otherwise, the matrix is trimmed and the returned values * for Loffset and Roffset are the number of cols removed on the left * and right sides of the matrix, respectively. * out: character array containing the matrix of codes (possibly trimmed) */ static void unsafe_overlap_encoding( const int *q_start, const int *q_width, const int *q_space, int q_len, int q_break, int flip_query, const int *s_start, const int *s_width, const int *s_space, int s_len, int as_matrix, int *Loffset, int *Roffset, CharAE *out) { int out_nelt0, i, starti, widthi, spacei, j, startj, widthj, spacej, j1, j2, nrow; char letter; if (!as_matrix) { if (q_break != 0) { if (flip_query) { CharAE_append_int(out, q_len - q_break); CharAE_append_char(out, '-', 2); CharAE_append_int(out, q_break); } else { CharAE_append_int(out, q_break); CharAE_append_char(out, '-', 2); CharAE_append_int(out, q_len - q_break); } } else { CharAE_append_int(out, q_len); } CharAE_append_char(out, ':', 1); out_nelt0 = _CharAE_get_nelt(out); } /* j1: 0-based index of first (i.e. leftmost) col with a non-"m", or 's_len' if there is no such col. j2: 0-based index of last (i.e. rightmost) col with a non-"a", or -1 if there is no such col. */ j1 = s_len; j2 = -1; /* Walk col by col. */ for (j = 0; j < s_len; j++) { startj = s_start[j]; widthj = s_width[j]; spacej = s_space == NULL ? 0 : s_space[j]; if (flip_query) { for (i = q_len - 1; i >= 0; i--) { starti = q_start[i]; widthi = q_width[i]; spacei = q_space == NULL ? 0 : - q_space[i]; letter = overlap_letter(starti, widthi, spacei, startj, widthj, spacej); CharAE_append_char(out, letter, 1); if (j1 == s_len && letter != 'm') j1 = j; if (letter != 'a') j2 = j; if (q_break != 0 && i == q_break) CharAE_append_char(out, '-', 2); } } else { for (i = 0; i < q_len; i++) { if (q_break != 0 && i == q_break) CharAE_append_char(out, '-', 2); starti = q_start[i]; widthi = q_width[i]; spacei = q_space == NULL ? 0 : q_space[i]; letter = overlap_letter(starti, widthi, spacei, startj, widthj, spacej); CharAE_append_char(out, letter, 1); if (j1 == s_len && letter != 'm') j1 = j; if (letter != 'a') j2 = j; } } } if (as_matrix) return; /* By making 'j2' a 1-based index we will then have 0 <= j1 <= j2 <= s_len, which will simplify further arithmetic/logic. */ if (q_len == 0) { /* A 0-row matrix needs special treatment. */ j2 = s_len; } else { j2++; } *Loffset = j1; *Roffset = s_len - j2; nrow = q_len; if (q_break != 0) nrow += 2; /* Remove "a"-cols on the right. */ _CharAE_set_nelt(out, out_nelt0 + j2 * nrow); /* Remove "m"-cols on the left. */ _CharAE_delete_at(out, out_nelt0, j1 * nrow); /* Insert ":" at the end of each remaining col. */ for (j = j2 - j1; j >= 1; j--) _CharAE_insert_at(out, out_nelt0 + j * nrow, ':'); return; }