Beispiel #1
0
void
do_cqi_cqp_list_subcorpora(void)
{
  char *corpus;
  CorpusList *cl, *mother;
  int n = 0;
  
  corpus = cqi_read_string();
  if (server_debug)
   Rprintf( "CQi: CQI_CQP_LIST_SUBCORPORA(%s)\n", corpus);
  mother = cqi_find_corpus(corpus);
  if (!check_corpus_name(corpus) || mother == NULL) 
    cqi_command(cqi_errno);
  else {

    /* ugly, but it's easiest ... first count corpora, then return names one by one */
    for (cl = FirstCorpusFromList(); cl != NULL; cl = NextCorpusFromList(cl)) {
      if (cl->type == SUB && cl->corpus == mother->corpus)
        n++;
    }
    cqi_send_word(CQI_DATA_STRING_LIST);
    cqi_send_int(n);
    for (cl = FirstCorpusFromList(); cl != NULL; cl = NextCorpusFromList(cl)) {
      if (cl->type == SUB && cl->corpus == mother->corpus)
        cqi_send_string(cl->name);
    }
    cqi_flush();

  }
  free(corpus);
}
Beispiel #2
0
void
do_cqi_cqp_drop_subcorpus(void)
{
  char *subcorpus;
  CorpusList *cl;
  char *c, *sc;

  subcorpus = cqi_read_string();
  if (server_debug)
   Rprintf( "CQi: CQI_CQP_DROP_SUBCORPUS('%s')\n", subcorpus);

  /* make sure it is a subcorpus, not a root corpus */
  if (!split_subcorpus_spec(subcorpus, &c, &sc))
    cqi_command(cqi_errno);
  else if (sc == NULL) {
    free(c);
    cqi_command(CQI_ERROR_SYNTAX_ERROR);
  }
  else {
    free(c); free(sc);
    cl = cqi_find_corpus(subcorpus);
    if (cl == NULL) 
      cqi_command(cqi_errno);
    else {
      dropcorpus(cl);
      cqi_command(CQI_STATUS_OK);
    }
  }

  free(subcorpus);
}
Beispiel #3
0
/* 
 * ------------------------------------------------------------------------
 * 
 * "rcqpCmd_drop_subcorpus(SEXP inSubcorpus)" --
 * 
 * 
 * 
 * ------------------------------------------------------------------------
 */
SEXP rcqpCmd_drop_subcorpus(SEXP inSubcorpus)
{
	SEXP			result = R_NilValue;
	char *			subcorpus;
	char 			*c, *sc;
	CorpusList *	cl;
	
	PROTECT(inSubcorpus);
	
	subcorpus = (char*)CHAR(STRING_ELT(inSubcorpus,0));
	
	/* Make sure it is a subcorpus, not a root corpus */
	if (!split_subcorpus_spec(subcorpus, &c, &sc)) {
		UNPROTECT(1);
		rcqp_error_code(cqi_errno);
	} else if (sc == NULL) {
		free(c);
		UNPROTECT(1);
		error("can't drop a root corpus.");
	} else {
		free(c); free(sc);
		cl = cqi_find_corpus(subcorpus);
		if (cl == NULL) {
			UNPROTECT(1);
			rcqp_error_code(cqi_errno);
		} else {
			dropcorpus(cl);
		}
	}
	
	UNPROTECT(1);
	
	return result;
}
Beispiel #4
0
/* 
 * ------------------------------------------------------------------------
 * 
 * "rcqpCmd_fdist2(SEXP inSubcorpus, SEXP inField1, SEXP inKey1, SEXP inField2, SEXP inKey2, SEXP inCutoff)" --
 * 
 * NB:
 *     inField1 and inKey1 are the "source" parameters in compute_grouping()
 *     inField2 and inKey2 are the "target" parameters in compute_grouping()
 * -> group NP matchend word by target lemma;
 *             <   target  >    <  source  >
 * corresponds to
 * -> cqi_fdist2("DICKENS:NP","target", "lemma", "matchend","word")
 * 
 * ------------------------------------------------------------------------
 */
SEXP rcqpCmd_fdist2(SEXP inSubcorpus, SEXP inField1, SEXP inKey1, SEXP inField2, SEXP inKey2, SEXP inCutoff)
{
	SEXP			result = R_NilValue;
	char 			*subcorpus, *att1, *att2;
	CorpusList *	cl;
	int				i, cutoff, size;
	Group *			table;
	FieldType		fieldtype1 = NoField;
	FieldType		fieldtype2 = NoField;

	PROTECT(inSubcorpus);
	PROTECT(inField1);
	PROTECT(inKey1);
	PROTECT(inField2);
	PROTECT(inKey2);
	PROTECT(inCutoff);
	
	subcorpus = (char*)CHAR(STRING_ELT(inSubcorpus,0));
	cl = cqi_find_corpus(subcorpus);
	if (cl == NULL) {
		UNPROTECT(6);
		rcqp_error_code(cqi_errno);
	}
	
	cutoff = asInteger(inCutoff);
	if (cutoff == NA_INTEGER) {
		UNPROTECT(6);
	    error("invalid 'cutoff' value (too large or NA)");
	}
	
	fieldtype1 = rcqp_get_field_type(inField1);
	fieldtype2 = rcqp_get_field_type(inField2);
	att1 = (char*)CHAR(STRING_ELT(inKey1,0));
	att2 = (char*)CHAR(STRING_ELT(inKey2,0));
	
    /* compute_grouping() returns tokens with f > cutoff, */
    /* but CQi specifies f >= cutoff */
    cutoff = (cutoff > 0) ? cutoff - 1 : 0;

	table = compute_grouping(cl, fieldtype1, 0, att1, fieldtype2, 0, att2, cutoff);
    if (table == NULL) {
      rcqp_error_code(CQI_CQP_ERROR_GENERAL);
    } else {
      size = table->nr_cells;
	  result = PROTECT(allocMatrix(INTSXP, size, 3));

	  for (i=0; i < size; i++) {
		  INTEGER(result)[i] = table->count_cells[i].s;
		  INTEGER(result)[i+size] = table->count_cells[i].t;
		  INTEGER(result)[i+(size*2)] = table->count_cells[i].freq;
      }

      free_group(&table);
    }

	UNPROTECT(7);
	
	return result;
}
Beispiel #5
0
/* 
 * ------------------------------------------------------------------------
 * 
 * "rcqpCmd_query(SEXP inMother, SEXP inChild, SEXP inQuery)" --
 * 
 *  
 * 
 * ------------------------------------------------------------------------
 */
SEXP rcqpCmd_query(SEXP inMother, SEXP inChild, SEXP inQuery)
{
	SEXP			result = R_NilValue;
	char			*child, *mother, *query, *c, *sc;
	
	if (!isString(inMother) || length(inMother) != 1) error("invalid corpus name");
	if (!isString(inChild) || length(inChild) != 1) error("invalid subcorpus name");
	if (!isString(inQuery) || length(inQuery) != 1) error("invalid query name");
	
	PROTECT(inMother);
	PROTECT(inChild);
	PROTECT(inQuery);

	mother = (char*)CHAR(STRING_ELT(inMother,0));
	child = (char*)CHAR(STRING_ELT(inChild,0));
	query = (char*)CHAR(STRING_ELT(inQuery,0));
	
	if (!split_subcorpus_spec(mother, &c, &sc)) {
		rcqp_error_code(cqi_errno);
	} else {
		char *cqp_query;
		int len = strlen(child) + strlen(query) + 10;
		
		cqp_query = (char *) cl_malloc(len);
		if (!check_subcorpus_name(child) || !cqi_activate_corpus(mother)) {
			rcqp_error_code(cqi_errno);
		} else {
			query_lock = floor(1e9 * cl_runif()) + 1; /* activate query lock mode with random key */
			
			if (rcqp_query_has_semicolon(query)) {
				sprintf(cqp_query, "%s = %s", child, query);
			} else {
				sprintf(cqp_query, "%s = %s;", child, query);
			}
			
			if (!cqp_parse_string(cqp_query)) {
				rcqp_error_code(CQI_CQP_ERROR_GENERAL); /* should be changed to detailed error messages */
			} else {
				char *			full_child;
				CorpusList *	childcl;
				
				full_child = combine_subcorpus_spec(c, child); /* c is the 'physical' part of the mother corpus */
				childcl = cqi_find_corpus(full_child);
				if ((childcl) == NULL) {
					rcqp_error_code(CQI_CQP_ERROR_GENERAL);
				} 
				free(full_child);
			}			
			query_lock = 0;           /* deactivate query lock mode */
		}
		free(cqp_query);
	}
	free(c);
	free(sc);
	
	UNPROTECT(3);
	
	return result;
}
Beispiel #6
0
void 
do_cqi_cqp_query(void)
{
  char *child, *mother, *query, *c, *sc;
  
  mother = cqi_read_string();
  child = cqi_read_string();
  query = cqi_read_string();
  if (server_debug) 
   Rprintf( "CQi: CQI_CQP_QUERY('%s', '%s', '%s')\n", mother, child, query);
  if (!split_subcorpus_spec(mother, &c, &sc)) {
    cqi_command(cqi_errno);
  }
  else {
    char *cqp_query;
    int len = strlen(child) + strlen(query) + 10;
    
    cqp_query = (char *) cl_malloc(len);
    if (!check_subcorpus_name(child) || !cqi_activate_corpus(mother)) {
      cqi_command(cqi_errno);
    }
    else {
      query_lock = floor(1e9 * cl_runif()) + 1; /* activate query lock mode with random key */

     Rprintf("CQPSERVER: query_lock = %d\n", query_lock);
      if (query_has_semicolon(query))
        sprintf(cqp_query, "%s = %s", child, query);
      else
        sprintf(cqp_query, "%s = %s;", child, query);
      if (!cqp_parse_string(cqp_query))
        cqi_command(CQI_CQP_ERROR_GENERAL); /* should be changed to detailed error messages */
      else {
        char *full_child;
        CorpusList *childcl;
        
        full_child = combine_subcorpus_spec(c, child); /* c is the 'physical' part of the mother corpus */
        childcl = cqi_find_corpus(full_child);
        if ((childcl) == NULL)
          cqi_command(CQI_CQP_ERROR_GENERAL);
        else {
          if (server_log) {
           Rprintf("'%s' ran the following query on %s\n", user, mother);
           Rprintf("\t%s\n", cqp_query);
           Rprintf("and got %d matches.\n", childcl->size);
          }
          cqi_command(CQI_STATUS_OK);

        }
        free(full_child);
      }

      query_lock = 0;           /* deactivate query lock mode */
    }
    free(cqp_query);
  }
  free(c);
  free(sc);
}
Beispiel #7
0
void
do_cqi_cqp_subcorpus_has_field(void)
{
  char *subcorpus;
  CorpusList *cl;
  cqi_byte field;
  char *fieldname;
  int field_ok = 1;             /* field valid? */

  subcorpus = cqi_read_string();
  field = cqi_read_byte();

  fieldname = cqi_field_name(field);
  if (fieldname == NULL) {
    fieldname = "<invalid field>";
    field_ok = 0;
  }
  if (server_debug) 
   Rprintf( "CQi: CQI_CQP_SUBCORPUS_HAS_FIELD('%s', %s)\n", 
            subcorpus, fieldname);

  cl = cqi_find_corpus(subcorpus);
  if (cl == NULL) 
    cqi_command(cqi_errno);
  else if (!field_ok)
    cqi_command(CQI_CQP_ERROR_INVALID_FIELD);
  else {
    switch (field) {
    case CQI_CONST_FIELD_MATCH:
      cqi_data_bool(CQI_CONST_YES);
      break;
    case CQI_CONST_FIELD_MATCHEND:
      cqi_data_bool(CQI_CONST_YES);
      break;
    case CQI_CONST_FIELD_TARGET:
      if (cl->targets == NULL) 
        cqi_data_bool(CQI_CONST_NO);
      else 
        cqi_data_bool(CQI_CONST_YES);
      break;
    case CQI_CONST_FIELD_KEYWORD:
      if (cl->keywords == NULL) 
        cqi_data_bool(CQI_CONST_NO);
      else 
        cqi_data_bool(CQI_CONST_YES);
      break;
    default:
      cqiserver_internal_error("do_cqi_cqp_subcorpus_has_field", "Can't identify requested field.");
    }
    cqi_flush();
  }

  free(subcorpus);
}
Beispiel #8
0
void
do_cqi_cqp_subcorpus_size(void)
{
  char *subcorpus;
  CorpusList *cl;

  subcorpus = cqi_read_string();
  if (server_debug)
   Rprintf( "CQi: CQI_CQP_SUBCORPUS_SIZE('%s')\n", subcorpus);
  cl = cqi_find_corpus(subcorpus);
  if (cl == NULL) 
    cqi_command(cqi_errno);
  else
    cqi_data_int(cl->size);
  
  free(subcorpus);
}
Beispiel #9
0
/* 
 * ------------------------------------------------------------------------
 * 
 * "rcqpCmd_subcorpus_size(SEXP inSubcorpus)" --
 * 
 * 
 * 
 * ------------------------------------------------------------------------
 */
SEXP rcqpCmd_subcorpus_size(SEXP inSubcorpus)
{
	SEXP			result = R_NilValue;
	char *			subcorpus;
	CorpusList *	cl;
	
	PROTECT(inSubcorpus);

	subcorpus = (char*)CHAR(STRING_ELT(inSubcorpus,0));
	cl = cqi_find_corpus(subcorpus);
	
	if (cl == NULL) {
		UNPROTECT(1);
		rcqp_error_code(cqi_errno);
	} else {
		result = PROTECT(allocVector(INTSXP, 1));
		INTEGER(result)[0] = cl->size;
	}

	UNPROTECT(2);

	return result;
}
Beispiel #10
0
/* 
 * ------------------------------------------------------------------------
 * 
 * "rcqpCmd_list_subcorpora(SEXP inCorpus)" --
 * 
 * 
 * 
 * ------------------------------------------------------------------------
 */
SEXP rcqpCmd_list_subcorpora(SEXP inCorpus)
{
	SEXP			result = R_NilValue;
	char *			corpus;
	CorpusList		*cl, *mother;
	int				i = 0, n = 0;
	
	PROTECT(inCorpus);

	corpus = (char*)CHAR(STRING_ELT(inCorpus,0));

	mother = cqi_find_corpus(corpus);
	if (!check_corpus_name(corpus) || mother == NULL) {
		UNPROTECT(1);
		rcqp_error_code(cqi_errno);
	} else {
		/* First count subcorpora */
		for (cl = FirstCorpusFromList(); cl != NULL; cl = NextCorpusFromList(cl)) {
			if (cl->type == SUB && cl->corpus == mother->corpus) n++;
		}
		
		result = PROTECT(allocVector(STRSXP, n));

		/* Then build list of names */
		for (cl = FirstCorpusFromList(); cl != NULL; cl = NextCorpusFromList(cl)) {
			if (cl->type == SUB && cl->corpus == mother->corpus) {
				SET_STRING_ELT(result, i, mkChar(cl->name));
				i++;
			}
		}
	}
	
	UNPROTECT(2);
	
	return result;
}
Beispiel #11
0
void
do_cqi_cqp_fdist_2(void)
{
  char *subcorpus;
  CorpusList *cl;
  int cutoff;
  cqi_byte field1, field2;
  char *att1, *att2;
  Group *table;
  int i, size;
  char *fieldname1, *fieldname2;
  FieldType fieldtype1 = NoField, fieldtype2 = NoField;
  int fields_ok = 1;            /* (both) fields valid? */

  subcorpus = cqi_read_string();
  cutoff = cqi_read_int();
  field1 = cqi_read_byte();
  att1 = cqi_read_string();
  field2 = cqi_read_byte();
  att2 = cqi_read_string();

  /* not exactly the fastest way to do it ... */
  fieldname1 = cqi_field_name(field1);
  if (fieldname1 == NULL) {
    fieldname1 = "<invalid field>";
    fields_ok = 0;
  }
  else {
    fieldtype1 = field_name_to_type(fieldname1);
  }
  fieldname2 = cqi_field_name(field2);
  if (fieldname2 == NULL) {
    fieldname2 = "<invalid field>";
    fields_ok = 0;
  }
  else {
    fieldtype2 = field_name_to_type(fieldname2);
  }
  if (server_debug) 
   Rprintf( "CQi: CQI_CQP_FDIST_2('%s', %d, %s, %s, %s, %s)\n", 
            subcorpus, cutoff, fieldname1, att1, fieldname2, att2);
  
  cl = cqi_find_corpus(subcorpus);
  if (cl == NULL) 
    cqi_command(cqi_errno);
  else if (!fields_ok)
    cqi_command(CQI_CQP_ERROR_INVALID_FIELD);
  else {
    /* compute_grouping() returns tokens with f > cutoff, but CQi specifies f >= cutoff */
    cutoff = (cutoff > 0) ? cutoff - 1 : 0;
    table = compute_grouping(cl, fieldtype1, 0, att1, fieldtype2, 0, att2, cutoff);
    if (table == NULL) {
      cqi_command(CQI_CQP_ERROR_GENERAL);
    }
    else {
      size = table->nr_cells;
      cqi_send_word(CQI_DATA_INT_TABLE);        /* return table with 3 columns & <size> rows */
      cqi_send_int(size);
      cqi_send_int(3);
      for (i=0; i < size; i++) {
        cqi_send_int(table->count_cells[i].s);
        cqi_send_int(table->count_cells[i].t);
        cqi_send_int(table->count_cells[i].freq);
      }
      cqi_flush();
      free_group(&table);
    }
  }

  cl_free(subcorpus);
  cl_free(att1);
  cl_free(att2);
}
Beispiel #12
0
void
do_cqi_cqp_dump_subcorpus(void)
{
  char *subcorpus;
  CorpusList *cl;
  cqi_byte field;
  int i, first, last, size;
  char *fieldname;
  int field_ok = 1;             /* field valid? */

  subcorpus = cqi_read_string();
  field = cqi_read_byte();
  first = cqi_read_int();
  last = cqi_read_int();

  fieldname = cqi_field_name(field);
  if (fieldname == NULL) {
    fieldname = "<invalid field>";
    field_ok = 0;
  }
  if (server_debug) 
   Rprintf( "CQi: CQI_CQP_DUMP_SUBCORPUS('%s', %s, %d, %d)\n", 
            subcorpus, fieldname, first, last);

  cl = cqi_find_corpus(subcorpus);
  if (cl == NULL) 
    cqi_command(cqi_errno);
  else if (!field_ok)
    cqi_command(CQI_CQP_ERROR_INVALID_FIELD);
  else if ((last < first) || (first < 0) || (last >= cl->size)) 
    cqi_command(CQI_CQP_ERROR_OUT_OF_RANGE);
  else {
      cqi_send_word(CQI_DATA_INT_LIST); /* assemble by hand, so we don't have to allocate a temporary list */
      size = last - first + 1;
      cqi_send_int(size);
      switch (field) {
      case CQI_CONST_FIELD_MATCH:
        for (i=first; i<=last; i++)
          cqi_send_int(cl->range[i].start);
        break;
      case CQI_CONST_FIELD_MATCHEND:
        for (i=first; i<=last; i++)
          cqi_send_int(cl->range[i].end);
        break;
      case CQI_CONST_FIELD_TARGET:
        if (cl->targets == NULL) 
          do_cqi_send_minus_one_list(size);
        else 
          for (i=first; i<=last; i++)
            cqi_send_int(cl->targets[i]);
        break;
      case CQI_CONST_FIELD_KEYWORD:
        if (cl->keywords == NULL) 
          do_cqi_send_minus_one_list(size);
        else 
          for (i=first; i<=last; i++)
            cqi_send_int(cl->keywords[i]);
        break;
      default:
        cqiserver_internal_error("do_cqi_cqp_dump_subcorpus", "No handler for requested field.");
      }
      cqi_flush();
  }

  free(subcorpus);
}
Beispiel #13
0
/* 
 * ------------------------------------------------------------------------
 * 
 * "rcqpCmd_dump_subcorpus(SEXP inSubcorpus, SEXP inField, SEXP inFirst, SEXP inLast)" --
 * 
 * 
 * ------------------------------------------------------------------------
 */
SEXP rcqpCmd_dump_subcorpus(SEXP inSubcorpus, SEXP inFirst, SEXP inLast)
{
	SEXP			result = R_NilValue;
	char *			subcorpus;
	CorpusList *	cl;
	int				i, first, last, nrows;
	
	if (!isString(inSubcorpus) || length(inSubcorpus) != 1) error("invalid subcorpus name");
	
	PROTECT(inSubcorpus);
	PROTECT(inFirst);
	PROTECT(inLast);
	
	first = asInteger(inFirst);
	if (first == NA_INTEGER) {
		UNPROTECT(3);
	    error("invalid 'first' value (too large or NA)");
	}
	last = asInteger(inLast);
	if (last == NA_INTEGER) {
		UNPROTECT(3);
	    error("invalid 'last' value (too large or NA)");
	}
	subcorpus = (char*)CHAR(STRING_ELT(inSubcorpus,0));
	
	cl = cqi_find_corpus(subcorpus);
	if (cl == NULL) {
		UNPROTECT(3);
		rcqp_error_code(cqi_errno);
	}
	
	if ((last < first) || (first < 0) || (last >= cl->size)) {
		error("indices out of range\n");
	}

	nrows = last - first + 1;
	result = PROTECT(allocMatrix(INTSXP, nrows, 4));
	
	for (i = 0; i< nrows; i++) {
		/* 'match' column */
		INTEGER(result)[i] = cl->range[i+first].start;
		
		/* 'matchend' column */
		INTEGER(result)[i+nrows] = cl->range[i+first].end;
		
		/* 'target' column */
		if (cl->targets == NULL) {
			INTEGER(result)[i+2*nrows] = -1;
		} else {
			INTEGER(result)[i+2*nrows] = cl->targets[i+first];
		} 
		
		/* 'keyword' column */
		if (cl->keywords == NULL) {
			INTEGER(result)[i+3*nrows] = -1;
		} else {
			INTEGER(result)[i+3*nrows] = cl->keywords[i+first];
		} 
	}
	
	UNPROTECT(4);
	
	return result;
}