예제 #1
0
파일: rcqpCommands.c 프로젝트: rforge/rcwb
/* 
 * ------------------------------------------------------------------------
 * 
 * "rcqpCmd_fdist2(SEXP inSubcorpus, SEXP inField1, SEXP inKey1, SEXP inField2, SEXP inKey2, SEXP inCutoff)" --
 * 
 * NB:
 *     inField1 and inKey1 are the "source" parameters in compute_grouping()
 *     inField2 and inKey2 are the "target" parameters in compute_grouping()
 * -> group NP matchend word by target lemma;
 *             <   target  >    <  source  >
 * corresponds to
 * -> cqi_fdist2("DICKENS:NP","target", "lemma", "matchend","word")
 * 
 * ------------------------------------------------------------------------
 */
SEXP rcqpCmd_fdist2(SEXP inSubcorpus, SEXP inField1, SEXP inKey1, SEXP inField2, SEXP inKey2, SEXP inCutoff)
{
	SEXP			result = R_NilValue;
	char 			*subcorpus, *att1, *att2;
	CorpusList *	cl;
	int				i, cutoff, size;
	Group *			table;
	FieldType		fieldtype1 = NoField;
	FieldType		fieldtype2 = NoField;

	PROTECT(inSubcorpus);
	PROTECT(inField1);
	PROTECT(inKey1);
	PROTECT(inField2);
	PROTECT(inKey2);
	PROTECT(inCutoff);
	
	subcorpus = (char*)CHAR(STRING_ELT(inSubcorpus,0));
	cl = cqi_find_corpus(subcorpus);
	if (cl == NULL) {
		UNPROTECT(6);
		rcqp_error_code(cqi_errno);
	}
	
	cutoff = asInteger(inCutoff);
	if (cutoff == NA_INTEGER) {
		UNPROTECT(6);
	    error("invalid 'cutoff' value (too large or NA)");
	}
	
	fieldtype1 = rcqp_get_field_type(inField1);
	fieldtype2 = rcqp_get_field_type(inField2);
	att1 = (char*)CHAR(STRING_ELT(inKey1,0));
	att2 = (char*)CHAR(STRING_ELT(inKey2,0));
	
    /* compute_grouping() returns tokens with f > cutoff, */
    /* but CQi specifies f >= cutoff */
    cutoff = (cutoff > 0) ? cutoff - 1 : 0;

	table = compute_grouping(cl, fieldtype1, 0, att1, fieldtype2, 0, att2, cutoff);
    if (table == NULL) {
      rcqp_error_code(CQI_CQP_ERROR_GENERAL);
    } else {
      size = table->nr_cells;
	  result = PROTECT(allocMatrix(INTSXP, size, 3));

	  for (i=0; i < size; i++) {
		  INTEGER(result)[i] = table->count_cells[i].s;
		  INTEGER(result)[i+size] = table->count_cells[i].t;
		  INTEGER(result)[i+(size*2)] = table->count_cells[i].freq;
      }

      free_group(&table);
    }

	UNPROTECT(7);
	
	return result;
}
예제 #2
0
파일: cqpserver.c 프로젝트: cran/rcqp
void
do_cqi_cqp_fdist_2(void)
{
  char *subcorpus;
  CorpusList *cl;
  int cutoff;
  cqi_byte field1, field2;
  char *att1, *att2;
  Group *table;
  int i, size;
  char *fieldname1, *fieldname2;
  FieldType fieldtype1 = NoField, fieldtype2 = NoField;
  int fields_ok = 1;            /* (both) fields valid? */

  subcorpus = cqi_read_string();
  cutoff = cqi_read_int();
  field1 = cqi_read_byte();
  att1 = cqi_read_string();
  field2 = cqi_read_byte();
  att2 = cqi_read_string();

  /* not exactly the fastest way to do it ... */
  fieldname1 = cqi_field_name(field1);
  if (fieldname1 == NULL) {
    fieldname1 = "<invalid field>";
    fields_ok = 0;
  }
  else {
    fieldtype1 = field_name_to_type(fieldname1);
  }
  fieldname2 = cqi_field_name(field2);
  if (fieldname2 == NULL) {
    fieldname2 = "<invalid field>";
    fields_ok = 0;
  }
  else {
    fieldtype2 = field_name_to_type(fieldname2);
  }
  if (server_debug) 
   Rprintf( "CQi: CQI_CQP_FDIST_2('%s', %d, %s, %s, %s, %s)\n", 
            subcorpus, cutoff, fieldname1, att1, fieldname2, att2);
  
  cl = cqi_find_corpus(subcorpus);
  if (cl == NULL) 
    cqi_command(cqi_errno);
  else if (!fields_ok)
    cqi_command(CQI_CQP_ERROR_INVALID_FIELD);
  else {
    /* compute_grouping() returns tokens with f > cutoff, but CQi specifies f >= cutoff */
    cutoff = (cutoff > 0) ? cutoff - 1 : 0;
    table = compute_grouping(cl, fieldtype1, 0, att1, fieldtype2, 0, att2, cutoff);
    if (table == NULL) {
      cqi_command(CQI_CQP_ERROR_GENERAL);
    }
    else {
      size = table->nr_cells;
      cqi_send_word(CQI_DATA_INT_TABLE);        /* return table with 3 columns & <size> rows */
      cqi_send_int(size);
      cqi_send_int(3);
      for (i=0; i < size; i++) {
        cqi_send_int(table->count_cells[i].s);
        cqi_send_int(table->count_cells[i].t);
        cqi_send_int(table->count_cells[i].freq);
      }
      cqi_flush();
      free_group(&table);
    }
  }

  cl_free(subcorpus);
  cl_free(att1);
  cl_free(att2);
}