/* * ------------------------------------------------------------------------ * * "rcqpCmd_fdist2(SEXP inSubcorpus, SEXP inField1, SEXP inKey1, SEXP inField2, SEXP inKey2, SEXP inCutoff)" -- * * NB: * inField1 and inKey1 are the "source" parameters in compute_grouping() * inField2 and inKey2 are the "target" parameters in compute_grouping() * -> group NP matchend word by target lemma; * < target > < source > * corresponds to * -> cqi_fdist2("DICKENS:NP","target", "lemma", "matchend","word") * * ------------------------------------------------------------------------ */ SEXP rcqpCmd_fdist2(SEXP inSubcorpus, SEXP inField1, SEXP inKey1, SEXP inField2, SEXP inKey2, SEXP inCutoff) { SEXP result = R_NilValue; char *subcorpus, *att1, *att2; CorpusList * cl; int i, cutoff, size; Group * table; FieldType fieldtype1 = NoField; FieldType fieldtype2 = NoField; PROTECT(inSubcorpus); PROTECT(inField1); PROTECT(inKey1); PROTECT(inField2); PROTECT(inKey2); PROTECT(inCutoff); subcorpus = (char*)CHAR(STRING_ELT(inSubcorpus,0)); cl = cqi_find_corpus(subcorpus); if (cl == NULL) { UNPROTECT(6); rcqp_error_code(cqi_errno); } cutoff = asInteger(inCutoff); if (cutoff == NA_INTEGER) { UNPROTECT(6); error("invalid 'cutoff' value (too large or NA)"); } fieldtype1 = rcqp_get_field_type(inField1); fieldtype2 = rcqp_get_field_type(inField2); att1 = (char*)CHAR(STRING_ELT(inKey1,0)); att2 = (char*)CHAR(STRING_ELT(inKey2,0)); /* compute_grouping() returns tokens with f > cutoff, */ /* but CQi specifies f >= cutoff */ cutoff = (cutoff > 0) ? cutoff - 1 : 0; table = compute_grouping(cl, fieldtype1, 0, att1, fieldtype2, 0, att2, cutoff); if (table == NULL) { rcqp_error_code(CQI_CQP_ERROR_GENERAL); } else { size = table->nr_cells; result = PROTECT(allocMatrix(INTSXP, size, 3)); for (i=0; i < size; i++) { INTEGER(result)[i] = table->count_cells[i].s; INTEGER(result)[i+size] = table->count_cells[i].t; INTEGER(result)[i+(size*2)] = table->count_cells[i].freq; } free_group(&table); } UNPROTECT(7); return result; }
void do_cqi_cqp_fdist_2(void) { char *subcorpus; CorpusList *cl; int cutoff; cqi_byte field1, field2; char *att1, *att2; Group *table; int i, size; char *fieldname1, *fieldname2; FieldType fieldtype1 = NoField, fieldtype2 = NoField; int fields_ok = 1; /* (both) fields valid? */ subcorpus = cqi_read_string(); cutoff = cqi_read_int(); field1 = cqi_read_byte(); att1 = cqi_read_string(); field2 = cqi_read_byte(); att2 = cqi_read_string(); /* not exactly the fastest way to do it ... */ fieldname1 = cqi_field_name(field1); if (fieldname1 == NULL) { fieldname1 = "<invalid field>"; fields_ok = 0; } else { fieldtype1 = field_name_to_type(fieldname1); } fieldname2 = cqi_field_name(field2); if (fieldname2 == NULL) { fieldname2 = "<invalid field>"; fields_ok = 0; } else { fieldtype2 = field_name_to_type(fieldname2); } if (server_debug) Rprintf( "CQi: CQI_CQP_FDIST_2('%s', %d, %s, %s, %s, %s)\n", subcorpus, cutoff, fieldname1, att1, fieldname2, att2); cl = cqi_find_corpus(subcorpus); if (cl == NULL) cqi_command(cqi_errno); else if (!fields_ok) cqi_command(CQI_CQP_ERROR_INVALID_FIELD); else { /* compute_grouping() returns tokens with f > cutoff, but CQi specifies f >= cutoff */ cutoff = (cutoff > 0) ? cutoff - 1 : 0; table = compute_grouping(cl, fieldtype1, 0, att1, fieldtype2, 0, att2, cutoff); if (table == NULL) { cqi_command(CQI_CQP_ERROR_GENERAL); } else { size = table->nr_cells; cqi_send_word(CQI_DATA_INT_TABLE); /* return table with 3 columns & <size> rows */ cqi_send_int(size); cqi_send_int(3); for (i=0; i < size; i++) { cqi_send_int(table->count_cells[i].s); cqi_send_int(table->count_cells[i].t); cqi_send_int(table->count_cells[i].freq); } cqi_flush(); free_group(&table); } } cl_free(subcorpus); cl_free(att1); cl_free(att2); }