static SEXP make_fasta_index_data_frame(const IntAE *recno_buf, const IntAE *fileno_buf, const LLongAE *offset_buf, const CharAEAE *desc_buf, const IntAE *seqlength_buf) { SEXP df, colnames, tmp; int i; PROTECT(df = NEW_LIST(5)); PROTECT(colnames = NEW_CHARACTER(5)); PROTECT(tmp = mkChar("recno")); SET_STRING_ELT(colnames, 0, tmp); UNPROTECT(1); PROTECT(tmp = mkChar("fileno")); SET_STRING_ELT(colnames, 1, tmp); UNPROTECT(1); PROTECT(tmp = mkChar("offset")); SET_STRING_ELT(colnames, 2, tmp); UNPROTECT(1); PROTECT(tmp = mkChar("desc")); SET_STRING_ELT(colnames, 3, tmp); UNPROTECT(1); PROTECT(tmp = mkChar("seqlength")); SET_STRING_ELT(colnames, 4, tmp); UNPROTECT(1); SET_NAMES(df, colnames); UNPROTECT(1); PROTECT(tmp = new_INTEGER_from_IntAE(recno_buf)); SET_ELEMENT(df, 0, tmp); UNPROTECT(1); PROTECT(tmp = new_INTEGER_from_IntAE(fileno_buf)); SET_ELEMENT(df, 1, tmp); UNPROTECT(1); PROTECT(tmp = NEW_NUMERIC(LLongAE_get_nelt(offset_buf))); for (i = 0; i < LENGTH(tmp); i++) REAL(tmp)[i] = (double) offset_buf->elts[i]; SET_ELEMENT(df, 2, tmp); UNPROTECT(1); PROTECT(tmp = new_CHARACTER_from_CharAEAE(desc_buf)); SET_ELEMENT(df, 3, tmp); UNPROTECT(1); PROTECT(tmp = new_INTEGER_from_IntAE(seqlength_buf)); SET_ELEMENT(df, 4, tmp); UNPROTECT(1); /* list_as_data_frame() performs IN-PLACE coercion */ list_as_data_frame(df, IntAE_get_nelt(recno_buf)); UNPROTECT(1); return df; }
/* --- .Call ENTRY POINT --- */ SEXP fasta_index(SEXP filexp_list, SEXP nrec, SEXP skip, SEXP seek_first_rec, SEXP lkup) { int nrec0, skip0, seek_rec0, i, recno, old_nrec, new_nrec, k; FASTAINDEX_loaderExt loader_ext; FASTAloader loader; IntAE *seqlength_buf, *fileno_buf; SEXP filexp; long long int offset, ninvalid; const char *errmsg; nrec0 = INTEGER(nrec)[0]; skip0 = INTEGER(skip)[0]; seek_rec0 = LOGICAL(seek_first_rec)[0]; loader_ext = new_FASTAINDEX_loaderExt(); loader = new_FASTAINDEX_loader(lkup, 1, &loader_ext); seqlength_buf = loader_ext.seqlength_buf; fileno_buf = new_IntAE(0, 0, 0); for (i = recno = 0; i < LENGTH(filexp_list); i++) { filexp = VECTOR_ELT(filexp_list, i); offset = ninvalid = 0LL; errmsg = parse_FASTA_file(filexp, nrec0, skip0, seek_rec0, &loader, &recno, &offset, &ninvalid); if (errmsg != NULL) error("reading FASTA file %s: %s", CHAR(STRING_ELT(GET_NAMES(filexp_list), i)), errmsg_buf); if (ninvalid != 0LL) warning("reading FASTA file %s: ignored %lld " "invalid one-letter sequence codes", CHAR(STRING_ELT(GET_NAMES(filexp_list), i)), ninvalid); old_nrec = IntAE_get_nelt(fileno_buf); new_nrec = IntAE_get_nelt(seqlength_buf); for (k = old_nrec; k < new_nrec; k++) IntAE_insert_at(fileno_buf, k, i + 1); } return make_fasta_index_data_frame(loader_ext.recno_buf, fileno_buf, loader_ext.offset_buf, loader_ext.desc_buf, seqlength_buf); }
static void FASTAINDEX_load_empty_seq(FASTAloader *loader) { FASTAINDEX_loaderExt *loader_ext; IntAE *seqlength_buf; loader_ext = loader->ext; seqlength_buf = loader_ext->seqlength_buf; IntAE_insert_at(seqlength_buf, IntAE_get_nelt(seqlength_buf), 0); return; }
void _MatchBuf_report_match(MatchBuf *match_buf, int PSpair_id, int start, int width) { IntAE *PSlink_ids, *count_buf, *start_buf, *width_buf; PSlink_ids = match_buf->PSlink_ids; count_buf = match_buf->match_counts; if (count_buf->elts[PSpair_id]++ == 0) IntAE_insert_at(PSlink_ids, IntAE_get_nelt(PSlink_ids), PSpair_id); if (match_buf->match_starts != NULL) { start_buf = match_buf->match_starts->elts[PSpair_id]; IntAE_insert_at(start_buf, IntAE_get_nelt(start_buf), start); } if (match_buf->match_widths != NULL) { width_buf = match_buf->match_widths->elts[PSpair_id]; IntAE_insert_at(width_buf, IntAE_get_nelt(width_buf), width); } return; }
static void FASTAINDEX_load_seq_data(FASTAloader *loader, const Chars_holder *seq_data) { FASTAINDEX_loaderExt *loader_ext; IntAE *seqlength_buf; loader_ext = loader->ext; seqlength_buf = loader_ext->seqlength_buf; seqlength_buf->elts[IntAE_get_nelt(seqlength_buf) - 1] += seq_data->length; return; }
void _MatchBuf_flush(MatchBuf *match_buf) { int nelt, i, PSlink_id; nelt = IntAE_get_nelt(match_buf->PSlink_ids); for (i = 0; i < nelt; i++) { PSlink_id = match_buf->PSlink_ids->elts[i]; match_buf->match_counts->elts[PSlink_id] = 0; if (match_buf->match_starts != NULL) IntAE_set_nelt(match_buf->match_starts->elts[PSlink_id], 0); if (match_buf->match_widths != NULL) IntAE_set_nelt(match_buf->match_widths->elts[PSlink_id], 0); } IntAE_set_nelt(match_buf->PSlink_ids, 0); return; }
static void FASTAINDEX_load_desc_line(FASTAloader *loader, int recno, long long int offset, const Chars_holder *desc_line) { FASTAINDEX_loaderExt *loader_ext; IntAE *recno_buf; LLongAE *offset_buf; CharAEAE *desc_buf; loader_ext = loader->ext; recno_buf = loader_ext->recno_buf; IntAE_insert_at(recno_buf, IntAE_get_nelt(recno_buf), recno + 1); offset_buf = loader_ext->offset_buf; LLongAE_insert_at(offset_buf, LLongAE_get_nelt(offset_buf), offset); desc_buf = loader_ext->desc_buf; // This works only because desc_line->seq is nul-terminated! append_string_to_CharAEAE(desc_buf, desc_line->ptr); return; }
static SEXP vwhich_XStringSet_XStringSet(SEXP pattern, SEXP subject, SEXP max_mismatch, SEXP min_mismatch, SEXP with_indels, SEXP fixed, SEXP algorithm) { XStringSet_holder P, S; int P_length, S_length, i, j; Chars_holder P_elt, S_elt; const char *algo; IntAEAE *ans_buf; P = _hold_XStringSet(pattern); P_length = _get_length_from_XStringSet_holder(&P); S = _hold_XStringSet(subject); S_length = _get_length_from_XStringSet_holder(&S); algo = CHAR(STRING_ELT(algorithm, 0)); ans_buf = new_IntAEAE(S_length, S_length); for (j = 0; j < S_length; j++) IntAE_set_nelt(ans_buf->elts[j], 0); _init_match_reporting("MATCHES_AS_COUNTS", 1); for (i = 0; i < P_length; i++) { P_elt = _get_elt_from_XStringSet_holder(&P, i); for (j = 0; j < S_length; j++) { S_elt = _get_elt_from_XStringSet_holder(&S, j); _match_pattern_XString(&P_elt, &S_elt, max_mismatch, min_mismatch, with_indels, fixed, algo); if (_get_match_count() != 0) IntAE_insert_at(ans_buf->elts[j], IntAE_get_nelt(ans_buf->elts[j]), i + 1); _drop_reported_matches(); } } return new_LIST_from_IntAEAE(ans_buf, 0); }
void _MatchBuf_append_and_flush(MatchBuf *match_buf1, MatchBuf *match_buf2, int view_offset) { int nelt, i, PSlink_id; IntAE *start_buf1, *start_buf2, *width_buf1, *width_buf2; if (match_buf1->ms_code == MATCHES_AS_NULL || match_buf2->ms_code == MATCHES_AS_NULL) return; if (IntAE_get_nelt(match_buf1->match_counts) != IntAE_get_nelt(match_buf2->match_counts) || match_buf1->ms_code != match_buf2->ms_code) error("Biostrings internal error in " "_MatchBuf_append_and_flush(): " "buffers are incompatible"); nelt = IntAE_get_nelt(match_buf2->PSlink_ids); for (i = 0; i < nelt; i++) { PSlink_id = match_buf2->PSlink_ids->elts[i]; if (match_buf1->match_counts->elts[PSlink_id] == 0) IntAE_insert_at(match_buf1->PSlink_ids, IntAE_get_nelt(match_buf1->PSlink_ids), PSlink_id); match_buf1->match_counts->elts[PSlink_id] += match_buf2->match_counts->elts[PSlink_id]; if (match_buf1->match_starts != NULL) { start_buf1 = match_buf1->match_starts->elts[PSlink_id]; start_buf2 = match_buf2->match_starts->elts[PSlink_id]; IntAE_append_shifted_vals(start_buf1, start_buf2->elts, IntAE_get_nelt(start_buf2), view_offset); } if (match_buf1->match_widths != NULL) { width_buf1 = match_buf1->match_widths->elts[PSlink_id]; width_buf2 = match_buf2->match_widths->elts[PSlink_id]; IntAE_append(width_buf1, width_buf2->elts, IntAE_get_nelt(width_buf2)); } } _MatchBuf_flush(match_buf2); return; }