// take lex return // sort by tok and iterate over the list skipping duplicates SEXP ng_extract_words(SEXP ng_ptr, SEXP ngsize_) { int i, j, k; int len, retlen; char *buf; ngram_t *ng = (ngram_t *) getRptr(ng_ptr); const int ngsize = INTEGER(ngsize_)[0]; wordlist_t *wl; SEXP RET; // Count # words retlen = 2; PROTECT(RET = allocVector(STRSXP, retlen)); // Convert them k = 0; for(i=0; i<ngsize; i++) { wl = ng[i].words; while(wl) { print_word(ng[i].words->word); } if(ng[i].words->word == NULL) { /* SET_STRING_ELT(RET, i, mkChar("<NA>"));*/ goto nextcycle; } len = ng[i].words->word->len; buf = malloc(len * sizeof(buf)); for (j=0; j<len; j++) buf[j] = ng[i].words->word->s[j]; SET_STRING_ELT(RET, k, mkCharLen(buf, len)); free(buf); nextcycle: k++; wl = wl->next; } UNPROTECT(1); return RET; }
SEXP ng_extract_str(SEXP str_ptr, SEXP R_strlen) { SEXP RET; char *str = (char *) getRptr(str_ptr); PROTECT(RET = allocVector(STRSXP, 1)); SET_STRING_ELT(RET, 0, mkCharLen(str, INTEGER(R_strlen)[0])); UNPROTECT(1); return RET; }
SEXP R_deque_to_Rlist(SEXP deque_ptr) { deque_t *dl = (deque_t *) getRptr(deque_ptr); CHECKPTR(dl); const int len = dl->len; list_t *l = dl->start; SEXP Rlist; PROTECT(Rlist = allocVector(VECSXP, len)); for (int i=0; i<len; i++) { append_item_to_Rlist(Rlist, i, l->data); l = l->next; } UNPROTECT(1); return Rlist; }
SEXP ng_extract_ngrams(SEXP ng_ptr, SEXP ngsize_) { int i, j, len; char *buf; ngramlist_t *ngl = (ngramlist_t *) getRptr(ng_ptr); ngram_t *ng = ngl->ng; const int ngsize = INTEGER(ngsize_)[0]; wordlist_t *wl; SEXP RET; PROTECT(RET = allocVector(STRSXP, ngsize)); for(i=0; i<ngsize; i++) { len = 0; wl = ng[i].words; while (wl) { len += wl->word->len; len++; // spaces wl = wl->next; } len--; // apparently mkCharLen handles the NUL terminator for some reason buf = malloc(len * sizeof(*buf)); for (j=0; j<len; j++) buf[j] = ng[i].words->word->s[j]; SET_STRING_ELT(RET, i, mkCharLen(buf, len)); free(buf); } UNPROTECT(1); return RET; }