Example #1
0
/* 
 * ------------------------------------------------------------------------
 * 
 * "rcqpCmd_attribute_size(SEXP inAttribute)" --
 * 
 * 
 * 
 * ------------------------------------------------------------------------
 */
SEXP rcqpCmd_attribute_size(SEXP inAttribute)
{
	SEXP			result = R_NilValue;
	char *			a;
	Attribute *		attribute;
	int				size;
	int				found = 0;
	
	if (!isString(inAttribute) || length(inAttribute) != 1) error("argument 'attribute' must be a string");
	PROTECT(inAttribute);

	a = (char*)CHAR(STRING_ELT(inAttribute,0));
	
	/* Need to try all possible attribute types */
	attribute = cqi_lookup_attribute(a, ATT_POS);
	if (attribute != NULL) {
		size = cl_max_cpos(attribute);
		if (size < 0) {
			UNPROTECT(1);
			rcqp_send_error();
		} else {
			found = 1;
		}
	} else {
		attribute = cqi_lookup_attribute(a, ATT_STRUC);
		if (attribute != NULL) {
			size = cl_max_struc(attribute);
			if (size < 0) {
				size = 0;
			} else {
				found = 1;
			}
		} else {
			attribute = cqi_lookup_attribute(a, ATT_ALIGN);
			if (attribute != NULL) {
				size = cl_max_alg(attribute);
				if (size < 0) {
					UNPROTECT(1);
					rcqp_send_error();
				} else {
					found = 1;
				}
			} else {
				UNPROTECT(1);
				rcqp_error_code(cqi_errno);
			}
		}
	}

	if (found) {
		result = PROTECT(allocVector(INTSXP, 1));
		INTEGER(result)[0] = size;
	} 
	
	UNPROTECT(2);

	return result;
}
Example #2
0
void
do_cqi_cl_attribute_size(void)
{
  char *a;
  Attribute *attribute;
  int size;
          
  a = cqi_read_string();        /* need to try all possible attribute types */
  if (server_debug)
   Rprintf( "CQi: CQI_CL_ATTRIBUTE_SIZE('%s')\n", a);
  attribute = cqi_lookup_attribute(a, ATT_POS);
  if (attribute != NULL) {
    size = cl_max_cpos(attribute);
    if (size < 0) {
      send_cl_error();
    }
    else {
      cqi_data_int(size);
    }
  }
  else {
    attribute = cqi_lookup_attribute(a, ATT_STRUC);
    if (attribute != NULL) {
      size = cl_max_struc(attribute);
      if (size < 0) {
        /*      send_cl_error(); */
        /* current version of CL considers 0 regions a data access error condition, but we want to allow that */
        cqi_data_int(0);
      }
      else {
        cqi_data_int(size);
      }
    }
    else {
      attribute = cqi_lookup_attribute(a, ATT_ALIGN);
      if (attribute != NULL) {
        size = cl_max_alg(attribute);
        if (size < 0) {
          send_cl_error();
        }
        else {
          cqi_data_int(size);
        }
      }
      else {
        cqi_command(cqi_errno); /* return errno from the last lookup */
      }
    }
  }
  free(a);
}
Example #3
0
/**
 * Prints statistical information about a corpus to STDOUT.
 *
 * Each corpus attribute gets info printed about it:
 * tokens and types for a P-attribute, number of instances
 * of regions for an S-attribute, number of alignment
 * blocks for an A-attribute.
 *
 * @param corpus  The corpus to analyse.
 */
void 
describecorpus_show_statistics (Corpus *corpus)
{
  Attribute *a;
  int tokens, types, regions, blocks;

  for (a = corpus->attributes; a; a = a->any.next) {
    switch(a->any.type) {
    case ATT_POS:
      Rprintf("p-ATT %-16s ", a->any.name);
      tokens = cl_max_cpos(a);
      types = cl_max_id(a);
      if ((tokens > 0) && (types > 0))
        Rprintf("%10d tokens, %8d types", tokens, types);
      else 
        Rprintf("           NO DATA");
      break;
    case ATT_STRUC:
      Rprintf("s-ATT %-16s ", a->any.name); 
      regions = cl_max_struc(a);
      if (regions >= 0) {
        Rprintf("%10d regions", regions);
        if (cl_struc_values(a))
          Rprintf(" (with annotations)");
      }
      else 
        Rprintf("           NO DATA");
      break;
    case ATT_ALIGN:
      Rprintf("a-ATT %-16s ", a->any.name); 
      blocks = cl_max_alg(a);
      if (blocks >= 0) {
        Rprintf("%10d alignment blocks", blocks);
        if (cl_has_extended_alignment(a))
          Rprintf(" (extended)");
      }
      else
        Rprintf("           NO DATA");
      break;
    default:
      Rprintf("???   %-16s (unknown attribute type)", a->any.name); 
      break;
    }
    Rprintf("\n");
  }

  Rprintf("\n");
}
Example #4
0
/**
 * Main function for cwb-s-encode.
 *
 * @param argc   Number of command-line arguments.
 * @param argv   Command-line arguments.
 */
int
main(int argc, char **argv)
{
  int input_line;
  int start, end;
  char *annot;
  char buf[CL_MAX_LINE_LENGTH];
  Attribute *att;
  int V_switch, values, S_annotations_dropped;
  int i, N;

  progname = argv[0];
  sencode_parse_options(argc, argv);

  /* -a mode: read existing regions into memory */
  if (add_to_existing) {
    if (corpus == NULL) {
      Rprintf( "Error: You have to specify source corpus (-C <corpus>) for -a switch.\n");
      rcqp_receive_error(1);
    }
    att = cl_new_attribute(corpus, new_satt.name, ATT_STRUC);
    if ((att != NULL) && (cl_max_struc(att) > 0)) {
      V_switch = new_satt.store_values;
      values = cl_struc_values(att);
      if (V_switch && (!values)) {
        Rprintf( "Error: Existing regions of -V attribute have no annotations.\n");
        rcqp_receive_error(1);
      }
      else if ((!V_switch) && values) {
        Rprintf( "Error: Existing regions of -S attributes have annotations.\n");
        rcqp_receive_error(1);
      }
      if (!silent)
        Rprintf("[Loading previous <%s> regions]\n", new_satt.name);

      N = cl_max_struc(att);
      for (i = 0; i < N; i++) {
        cl_struc2cpos(att, i, &start, &end);
        annot = cl_struc2str(att, i);
        SL_insert(start, end, annot);
      }
    }
    else {
      if (!silent)
        Rprintf("[No <%s> regions defined (skipped)]\n", new_satt.name);
    }
  }

  /* loop reading input (stdin or -f <file>) */
  if (in_memory && (!silent))
    Rprintf("[Reading input data]\n");
  input_line = 0;
  S_annotations_dropped = 0;
  while (fgets(buf, CL_MAX_LINE_LENGTH, text_fd)) {
    input_line++;

    /* check for buffer overflow */
    if (strlen(buf) >= (CL_MAX_LINE_LENGTH - 1)) {
      Rprintf( "BUFFER OVERFLOW, input line #%d is too long:\n>> %s", input_line, buf);
      rcqp_receive_error(1);
    }

    if (! sencode_parse_line(buf, &start, &end, &annot)) {
      Rprintf( "FORMAT ERROR on line #%d:\n>> %s", input_line, buf);
      rcqp_receive_error(1);
    }
    if (new_satt.store_values && (annot == NULL)) {
      Rprintf( "MISSING ANNOTATION on line #%d:\n>> %s", input_line, buf);
      rcqp_receive_error(1);
    }
    if ((!new_satt.store_values) && (annot != NULL)) {
      if (! S_annotations_dropped)
        Rprintf( "WARNING: Annotation for -S attribute ignored on line #%d (warning issued only once):\n>> %s", input_line, buf);
      S_annotations_dropped++;
    }
    if ((start <= new_satt.last_cpos) || (end < start)) {
      Rprintf( "RANGE INCONSISTENCY on line #%d:\n>> %s(end of previous region was %d)\n", input_line, buf, new_satt.last_cpos);
      rcqp_receive_error(1);
    }
    if (annot != NULL && set_att != set_none) {
      /* convert set annotation into standard syntax */
      annot = sencode_check_set(annot);
      if (annot == NULL) {
        Rprintf( "SET ANNOTATION SYNTAX ERROR on line #%d:\n>> %s", input_line, buf);
        rcqp_receive_error(1);
      }
    }

    /* debugging output */
    if (debug) {
      Rprintf( "[%d, %d]", start, end);
      if (annot != NULL)
        Rprintf( " <%s>", annot);
      Rprintf( "\n");
    }

    /* in -M mode, store this region in memory; otherwise write it to the disk files */
    if (in_memory)
      SL_insert(start, end, annot);
    else
      sencode_write_region(start, end, annot);

    cl_free(annot);
  }

  /* in -M mode, write data to disk now that we have finished looping across input data */
  if (in_memory) {
    SL item;

    if (!silent)
      Rprintf("[Creating encoded disk file(s)]\n");
    SL_rewind();
    while ((item = SL_next()) != NULL)
      sencode_write_region(item->start, item->end, item->annot);
  }

  /* close files */
  sencode_close_files();

  if (S_annotations_dropped > 0)
    Rprintf( "Warning: %d annotation values dropped for -S attribute '%s'.\n", S_annotations_dropped, new_satt.name);

  rcqp_receive_error(0);
}