CharAE *_new_CharAE_from_string(const char *string) { CharAE *ae; ae = _new_CharAE(strlen(string)); _CharAE_set_nelt(ae, ae->_buflength); memcpy(ae->elts, string, ae->_buflength); return ae; }
/* --- .Call ENTRY POINT ---/ * Same arguments as RangesList_encode_overlaps() plus: * 'query_hits', 'subject_hits': integer vectors of the same length. * 'flip_query': logical vector of the same length as 'query_hits'. */ SEXP Hits_encode_overlaps(SEXP query_starts, SEXP query_widths, SEXP query_spaces, SEXP query_breaks, SEXP subject_starts, SEXP subject_widths, SEXP subject_spaces, SEXP query_hits, SEXP subject_hits, SEXP flip_query) { int q_len, s_len, ans_len, i, j, k; const int *q_hits, *s_hits; SEXP ans_Loffset, ans_Roffset, ans_encoding, ans_encoding_elt, ans; CharAE buf; /* TODO: Add some basic checking of the input values. */ q_len = LENGTH(query_starts); s_len = LENGTH(subject_starts); ans_len = _check_integer_pairs(query_hits, subject_hits, &q_hits, &s_hits, "queryHits(hits)", "subjectHits(hits)"); PROTECT(ans_Loffset = NEW_INTEGER(ans_len)); PROTECT(ans_Roffset = NEW_INTEGER(ans_len)); PROTECT(ans_encoding = NEW_CHARACTER(ans_len)); buf = _new_CharAE(0); for (k = 0; k < ans_len; k++) { i = q_hits[k]; j = s_hits[k]; if (i == NA_INTEGER || i < 1 || i > q_len || j == NA_INTEGER || j < 1 || j > s_len) { UNPROTECT(3); error("'queryHits(hits)' or 'subjectHits(hits)' " "contain invalid indices"); } i--; j--; PROTECT(ans_encoding_elt = RangesList_encode_overlaps_ij( query_starts, query_widths, query_spaces, query_breaks, subject_starts, subject_widths, subject_spaces, i, j, LOGICAL(flip_query)[k], INTEGER(ans_Loffset) + k, INTEGER(ans_Roffset) + k, &buf)); SET_STRING_ELT(ans_encoding, k, ans_encoding_elt); UNPROTECT(1); _CharAE_set_nelt(&buf, 0); } PROTECT(ans = make_LIST_from_ovenc_parts(ans_Loffset, ans_Roffset, ans_encoding)); UNPROTECT(4); return ans; }
void _append_string_to_CharAE(CharAE *ae, const char *string) { int nnewval, nelt, new_nelt; char *dest; nnewval = strlen(string); nelt = _CharAE_get_nelt(ae); new_nelt = nelt + nnewval; if (new_nelt > ae->_buflength) CharAE_extend(ae, new_nelt); dest = ae->elts + nelt; memcpy(dest, string, sizeof(char) * nnewval); _CharAE_set_nelt(ae, new_nelt); return; }
/* * Delete 'nelt' elements, starting at position 'at'. * Doing _CharAE_delete_at(x, at, nelt) is equivalent to doing * _CharAE_delete_at(x, at, 1) 'nelt' times. */ void _CharAE_delete_at(CharAE *ae, int at, int nelt) { char *c1_p; const char *c2_p; int nelt0, i2; if (nelt == 0) return; c1_p = ae->elts + at; c2_p = c1_p + nelt; nelt0 = _CharAE_get_nelt(ae); for (i2 = at + nelt; i2 < nelt0; i2++) *(c1_p++) = *(c2_p++); _CharAE_set_nelt(ae, nelt0 - nelt); return; }
void _CharAE_insert_at(CharAE *ae, int at, char c) { int nelt, i; char *elt1_p; const char *elt2_p; nelt = _CharAE_get_nelt(ae); if (nelt >= ae->_buflength) CharAE_extend(ae, -1); elt1_p = ae->elts + nelt; elt2_p = elt1_p - 1; for (i = nelt; i > at; i--) *(elt1_p--) = *(elt2_p--); *elt1_p = c; _CharAE_set_nelt(ae, nelt + 1); return; }
/* --- .Call ENTRY POINT ---/ * 'query_starts', 'query_widths', 'query_spaces': lists of integer vectors. * The 3 lists are assumed to have the same length (M) and shape. * 'query_breaks': NULL or integer vector of length M. * 'subject_starts', 'subject_widths', 'subject_spaces': lists of integer * vectors. The 3 lists are assumed to have the same length (N) and shape. * Return a named list with the 3 following components (all of the same * length): * 1. Loffset: integer vector; * 2. Roffset: integer vector; * 3. encoding: character vector containing the compact encodings (type * II). */ SEXP RangesList_encode_overlaps(SEXP query_starts, SEXP query_widths, SEXP query_spaces, SEXP query_breaks, SEXP subject_starts, SEXP subject_widths, SEXP subject_spaces) { int q_len, s_len, ans_len, i, j, k; SEXP ans_Loffset, ans_Roffset, ans_encoding, ans_encoding_elt, ans; CharAE buf; /* TODO: Add some basic checking of the input values. */ q_len = LENGTH(query_starts); s_len = LENGTH(subject_starts); if (q_len == 0 || s_len == 0) ans_len = 0; else ans_len = q_len >= s_len ? q_len : s_len; PROTECT(ans_Loffset = NEW_INTEGER(ans_len)); PROTECT(ans_Roffset = NEW_INTEGER(ans_len)); PROTECT(ans_encoding = NEW_CHARACTER(ans_len)); buf = _new_CharAE(0); for (i = j = k = 0; k < ans_len; i++, j++, k++) { if (i >= q_len) i = 0; /* recycle i */ if (j >= s_len) j = 0; /* recycle j */ PROTECT(ans_encoding_elt = RangesList_encode_overlaps_ij( query_starts, query_widths, query_spaces, query_breaks, subject_starts, subject_widths, subject_spaces, i, j, 0, INTEGER(ans_Loffset) + k, INTEGER(ans_Roffset) + k, &buf)); SET_STRING_ELT(ans_encoding, k, ans_encoding_elt); UNPROTECT(1); _CharAE_set_nelt(&buf, 0); } if (ans_len != 0 && (i != q_len || j != s_len)) warning("longer object length is not a multiple " "of shorter object length"); PROTECT(ans = make_LIST_from_ovenc_parts(ans_Loffset, ans_Roffset, ans_encoding)); UNPROTECT(4); return ans; }
/* * q_start, q_width: int arrays of length q_len. No NAs. * q_space: NULL or an int array of length q_len. No NAs. * q_len: nb of ranges in the query. * q_break: 0 if all the ranges in the query are coming from the same * segment (single-end read), or, an int >= 1 and < q_len specifying * the position of the break between the ranges coming from one * segment and the ranges coming from the other if the query is a * paired-end read. * flip_query: if non-zero, then the query is "flipped" before the encoding is * computed. * s_start, s_width: int arrays of length s_len. No NAs. * s_space: NULL or an int array of length s_len. No NAs. * s_len: nb of ranges in the subject. * as_matrix, Loffset, Roffset: if as_matrix, then the full matrix of codes * is returned and the returned values for Loffset and Roffset are * undefined. Otherwise, the matrix is trimmed and the returned values * for Loffset and Roffset are the number of cols removed on the left * and right sides of the matrix, respectively. * out: character array containing the matrix of codes (possibly trimmed) */ static void unsafe_overlap_encoding( const int *q_start, const int *q_width, const int *q_space, int q_len, int q_break, int flip_query, const int *s_start, const int *s_width, const int *s_space, int s_len, int as_matrix, int *Loffset, int *Roffset, CharAE *out) { int out_nelt0, i, starti, widthi, spacei, j, startj, widthj, spacej, j1, j2, nrow; char letter; if (!as_matrix) { if (q_break != 0) { if (flip_query) { CharAE_append_int(out, q_len - q_break); CharAE_append_char(out, '-', 2); CharAE_append_int(out, q_break); } else { CharAE_append_int(out, q_break); CharAE_append_char(out, '-', 2); CharAE_append_int(out, q_len - q_break); } } else { CharAE_append_int(out, q_len); } CharAE_append_char(out, ':', 1); out_nelt0 = _CharAE_get_nelt(out); } /* j1: 0-based index of first (i.e. leftmost) col with a non-"m", or 's_len' if there is no such col. j2: 0-based index of last (i.e. rightmost) col with a non-"a", or -1 if there is no such col. */ j1 = s_len; j2 = -1; /* Walk col by col. */ for (j = 0; j < s_len; j++) { startj = s_start[j]; widthj = s_width[j]; spacej = s_space == NULL ? 0 : s_space[j]; if (flip_query) { for (i = q_len - 1; i >= 0; i--) { starti = q_start[i]; widthi = q_width[i]; spacei = q_space == NULL ? 0 : - q_space[i]; letter = overlap_letter(starti, widthi, spacei, startj, widthj, spacej); CharAE_append_char(out, letter, 1); if (j1 == s_len && letter != 'm') j1 = j; if (letter != 'a') j2 = j; if (q_break != 0 && i == q_break) CharAE_append_char(out, '-', 2); } } else { for (i = 0; i < q_len; i++) { if (q_break != 0 && i == q_break) CharAE_append_char(out, '-', 2); starti = q_start[i]; widthi = q_width[i]; spacei = q_space == NULL ? 0 : q_space[i]; letter = overlap_letter(starti, widthi, spacei, startj, widthj, spacej); CharAE_append_char(out, letter, 1); if (j1 == s_len && letter != 'm') j1 = j; if (letter != 'a') j2 = j; } } } if (as_matrix) return; /* By making 'j2' a 1-based index we will then have 0 <= j1 <= j2 <= s_len, which will simplify further arithmetic/logic. */ if (q_len == 0) { /* A 0-row matrix needs special treatment. */ j2 = s_len; } else { j2++; } *Loffset = j1; *Roffset = s_len - j2; nrow = q_len; if (q_break != 0) nrow += 2; /* Remove "a"-cols on the right. */ _CharAE_set_nelt(out, out_nelt0 + j2 * nrow); /* Remove "m"-cols on the left. */ _CharAE_delete_at(out, out_nelt0, j1 * nrow); /* Insert ":" at the end of each remaining col. */ for (j = j2 - j1; j >= 1; j--) _CharAE_insert_at(out, out_nelt0 + j * nrow, ':'); return; }