Ejemplo n.º 1
0
void
do_cqi_cl_struc2str(void)
{
  int *struclist;
  int len, i;
  char *a, *str;
  Attribute *attribute;

  a = cqi_read_string();
  len = cqi_read_int_list(&struclist);
  if (server_debug) {
   Rprintf( "CQi: CQI_CL_STRUC2STR('%s', [", a);
    for (i=0; i<len; i++)
     Rprintf( "%d ", struclist[i]);
   Rprintf( "])\n");
  }
  attribute = cqi_lookup_attribute(a, ATT_STRUC);
  if (attribute == NULL) {
    cqi_command(cqi_errno);
  }
  else {
    /* we assemble the CQI_DATA_STRING_LIST() return command by hand,
       so we don't have to allocate a temporary list */
    cqi_send_word(CQI_DATA_STRING_LIST);
    cqi_send_int(len);          /* list size */
    for (i=0; i<len; i++) {
      str = cl_struc2str(attribute, struclist[i]);
      cqi_send_string(str);     /* sends "" if str == NULL (wrong alignment number) */
    }
  }
  cqi_flush();
  if (struclist != NULL)
    free(struclist);                    /* don't forget to free allocated memory */
  free(a);
}
Ejemplo n.º 2
0
/**
 * Prints a starting tag for each s-attribute.
 */
void
decode_print_surrounding_s_att_values(int position)
{
  int i;
  char *tagname;

  for (i = 0; i < printValuesIndex; i++) {

    if (printValues[i]) {

      char *sval;
      int snum;

      snum = cl_cpos2struc(printValues[i], position);
      if (snum >= 0) {
        /* if it is a p- or a- attribute, snum is a CL error (less than 0) */
        sval = decode_string_escape(cl_struc2str(printValues[i], snum));
        tagname = printValues[i]->any.name;

        switch (mode) {
        case ConclineMode:
          printf("<%s %s>: ", tagname, sval);
          break;

        case LispMode:
          printf("(VALUE %s \"%s\")\n", tagname, sval);
          break;

        case XMLMode:
          printf("<element name=\"%s\" value=\"%s\"/>\n", tagname, sval);
          break;

        case EncodeMode:
          /* pretends to be a comment, but has to be stripped before feeding output to encode */
          printf("# %s=%s\n", tagname, sval);
          break;

        case StandardMode:
        default:
          printf("<%s %s>\n", tagname, sval);
          break;
        }
      }
      else {
        /* don't print tag if start position is not in region */
      }

    }
  }
}
Ejemplo n.º 3
0
/* 
 * ------------------------------------------------------------------------
 * 
 * "rcqpCmd_struc2str(SEXP inAttribute, SEXP inIds)" --
 * 
 * Function gets value of struc_num'th instance of the specified s-attribute.
 * 
 * ------------------------------------------------------------------------
 */
SEXP rcqpCmd_struc2str(SEXP inAttribute, SEXP inIds)
{
	SEXP			result = R_NilValue;
	int				idx;
	int				len, i;
	char 			*a, *str;
	Attribute *		attribute;
	
	if (!isString(inAttribute) || length(inAttribute) != 1) error("argument 'attribute' must be a string");
	PROTECT(inAttribute);
	if (!isVector(inIds)) error("argument 'ids' must be a vector of integers");
	PROTECT(inIds);
	
	a = (char*)CHAR(STRING_ELT(inAttribute,0));
	len = length(inIds);
	
	attribute = cqi_lookup_attribute(a, ATT_STRUC);
	if (attribute == NULL) {
		UNPROTECT(2);
		rcqp_error_code(cqi_errno);
	} else {
		result = PROTECT(allocVector(STRSXP, len));	
		
		for (i=0; i<len; i++) {
			idx = INTEGER(inIds)[i];	
			str = cl_struc2str(attribute, idx);
            /* Sends "" if str == NULL (cpos out of range) */
			if (str != NULL) {
				SET_STRING_ELT(result, i, mkChar(str));
			} 
		}
	}
	
	UNPROTECT(3);

	return result;
}
Ejemplo n.º 4
0
/**
 * Prints out the requested attributes for a sequence of tokens
 * (or a single token if end_position == -1).
 *
 * If the -c flag was used (and, thus, the context parameter is not NULL),
 * then the sequence is extended to the entire s-attribute region (in matchlist mode).
 */
void
decode_print_token_sequence(int start_position, int end_position, Attribute *context)
{
  int alg, aligned_start, aligned_end, aligned_start2, aligned_end2,
    rng_start, rng_end, snum;
  int start_context, end_context, dummy;
  int lastposa, i, w;

  /* pointer used for values of p-attributes */
  char *wrd;


  start_context = start_position;
  end_context = (end_position >= 0) ? end_position : start_position;
  /* above ensures that in non-matchlist mode (where ep == -1), we only print one token */

  if (context != NULL) {

    /* expand the start_context end_context numbers to the start
     * and end points of the containing region of the context s-attribute */
    if (!cl_cpos2struc2cpos(context, start_position,
                            &start_context, &end_context)) {
      start_context = start_position;
      end_context = (end_position >= 0) ? end_position : start_position;
    }
    else if (end_position >= 0) {
      if (!cl_cpos2struc2cpos(context, end_position,
                               &dummy, &end_context)) {
        end_context = (end_position >= 0) ? end_position : start_position;
      }
    }

    /* indicate that we're showing context */
    switch (mode) {
    case LispMode:
      printf("(TARGET %d\n", start_position);
      if (end_position >= 0)
        printf("(INTERVAL %d %d)\n", start_position, end_position);
      break;
    case EncodeMode:
    case ConclineMode:
      /* nothing here */
      break;
    case XMLMode:
      printf("<context start=\"%d\" end=\"%d\"/>\n", start_context, end_context);
      break;
    case StandardMode:
    default:
      if (end_position >= 0) {
        printf("INTERVAL %d %d\n", start_position, end_position);
      }
      else {
        printf("TARGET %d\n", start_position);
      }
      break;
    }

  } /* endif context != NULL */

  /* some extra information in -L and -H modes */
  if (mode == LispMode && end_position != -1)
    printf("(CONTEXT %d %d)\n", start_context, end_context);
  else if (mode == ConclineMode) {
    if (printnum)
      printf("%8d: ", start_position);
  }

  /* now print the token sequence (including context) with all requested attributes */
  for (w = start_context; w <= end_context; w++) {
    int beg_of_line;

    /* extract s-attribute regions for start and end tags into s_att_regions[] */
    N_sar = 0;                  /* counter and index */
    for (i = 0; i < print_list_index; i++) {
      if (print_list[i]->any.type == ATT_STRUC) {
        if ( ((snum = cl_cpos2struc(print_list[i], w)) >= 0) &&
             (cl_struc2cpos(print_list[i], snum, &rng_start, &rng_end)) &&
             ((w == rng_start) || (w == rng_end)) ) {
          s_att_regions[N_sar].name = print_list[i]->any.name;
          s_att_regions[N_sar].start = rng_start;
          s_att_regions[N_sar].end = rng_end;
          if (cl_struc_values(print_list[i]))
            s_att_regions[N_sar].annot = cl_struc2str(print_list[i], snum);
          else
            s_att_regions[N_sar].annot = NULL;
          N_sar++;
        }
      }
    }
    decode_sort_s_att_regions();       /* sort regions to ensure proper nesting of start and end tags */

    /* show corpus positions with -n option */
    if (printnum)
      switch (mode) {
      case LispMode:
        printf("(%d ", w);
        break;
      case EncodeMode:
        printf("%8d\t", w);
        break;
      case ConclineMode:
        /* nothing here (shown at start of line in -H mode) */
        break;
      case XMLMode:
        /* nothing here */
        break;
      case StandardMode:
      default:
        printf("%8d: ", w);
        break;
      }
    else {
      if (mode == LispMode)
        printf("(");            /* entire match is parenthesised list in -L mode */
    }

    lastposa = -1;

    /* print start tags (s- and a-attributes) with -C,-H,-X */
    if ((mode == EncodeMode) || (mode == ConclineMode) || (mode == XMLMode)) {

      /* print a-attributes from print_list[] */
      for (i = 0; i < print_list_index; i++) {
        switch (print_list[i]->any.type) {
        case ATT_ALIGN:
          if (
              ((alg = cl_cpos2alg(print_list[i], w)) >= 0)
              && (cl_alg2cpos(print_list[i], alg,
                              &aligned_start, &aligned_end,
                              &aligned_start2, &aligned_end2))
              && (w == aligned_start)
              ) {
            if (mode == XMLMode) {
              printf("<align type=\"start\" target=\"%s\"", print_list[i]->any.name);
              if (printnum)
                printf(" start=\"%d\" end=\"%d\"", aligned_start2, aligned_end2);
              printf("/>\n");
            }
            else {
              printf("<%s", print_list[i]->any.name);
              if (printnum)
                printf(" %d %d", aligned_start2, aligned_end2);
              printf(">%c", (mode == EncodeMode) ? '\n' : ' ');
            }
          }
          break;

        default:                /* ignore all other attribute types */
          break;
        }
      }

      /* print s-attributes from s_att_regions[] (using sar_sort_index[]) */
      for (i = 0; i < N_sar; i++) {
        SAttRegion *region = &(s_att_regions[sar_sort_index[i]]);

        if (region->start == w) {
          if (mode == XMLMode) {
            printf("<tag type=\"start\" name=\"%s\"", region->name);
            if (printnum)
              printf(" cpos=\"%d\"", w);
            if (region->annot)
              printf(" value=\"%s\"", decode_string_escape(region->annot));
            printf("/>\n");
          }
          else {
            printf("<%s%s%s>%c",
                   region->name,
                   region->annot ? " " : "",
                   region->annot ? region->annot : "",
                   (mode == ConclineMode ? ' ' : '\n'));
          }
        }
      }

    }

    /* now print token with its attribute values (p-attributes only for -C,-H,-X) */
    if (mode == XMLMode) {
      printf("<token");
      if (printnum)
        printf(" cpos=\"%d\"", w);
      printf(">");
    }

    beg_of_line = 1;
    /* Loop printing each attribute for this cpos (w) */
    for (i = 0; i < print_list_index; i++) {

      switch (print_list[i]->any.type) {
      case ATT_POS:
        lastposa = i;
        if ((wrd = decode_string_escape(cl_cpos2str(print_list[i], w))) != NULL) {
          switch (mode) {
          case LispMode:
            printf("(%s \"%s\")", print_list[i]->any.name, wrd);
            break;

          case EncodeMode:
            if (beg_of_line) {
              printf("%s", wrd);
              beg_of_line = 0;
            }
            else
              printf("\t%s", wrd);
            break;

          case ConclineMode:
            if (beg_of_line) {
              printf("%s", wrd);
              beg_of_line = 0;
            }
            else
              printf("/%s", wrd);
            break;

          case XMLMode:
            printf(" <attr name=\"%s\">%s</attr>", print_list[i]->any.name, wrd);
            break;

          case StandardMode:
          default:
            printf("%s=%s\t", print_list[i]->any.name, wrd);
            break;
          }
        }
        else {
          cl_error("(aborting) cl_cpos2str() failed");
          decode_cleanup(1);
        }
        break;

      case ATT_ALIGN:
        /* do not print in encode, concline or xml modes because already done (above) */
        if ((mode != EncodeMode) && (mode != ConclineMode) && (mode != XMLMode)) {
          if (
              ((alg = cl_cpos2alg(print_list[i], w)) >= 0)
              && (cl_alg2cpos(print_list[i], alg,
                              &aligned_start, &aligned_end,
                              &aligned_start2, &aligned_end2))
              ) {
            if (mode == LispMode) {
              printf("(ALG %d %d %d %d)",
                     aligned_start, aligned_end, aligned_start2, aligned_end2);
            }
            else {
              printf("%d-%d==>%s:%d-%d\t",
                     aligned_start, aligned_end, print_list[i]->any.name, aligned_start2, aligned_end2);
            }
          }
          else if (cl_errno != CDA_OK) {
            cl_error("(aborting) alignment error");
            decode_cleanup(1);
          }
        }
        break;

      case ATT_STRUC:
        /* do not print in encode, concline or xml modes because already done (above) */
        if ((mode != EncodeMode) && (mode != ConclineMode) && (mode != XMLMode)) {
          if (cl_cpos2struc2cpos(print_list[i], w, &rng_start, &rng_end)) {
            /* standard and -L mode don't show tag annotations */
            printf(mode == LispMode ? "(STRUC %s %d %d)" : "<%s>:%d-%d\t",
                   print_list[i]->any.name,
                   rng_start, rng_end);
          }
          else if (cl_errno != CDA_OK)
            cl_error("(aborting) cl_cpos2struc2cpos() failed");
        }
        break;

      case ATT_DYN:
        /* dynamic attributes aren't implemented */
      default:
        break;
      }
    }

    /* print token separator (or end of token in XML mode) */
    switch (mode) {
    case LispMode:
      printf(")\n");
      break;
    case ConclineMode:
      printf(" ");
      break;
    case XMLMode:
      printf(" </token>\n");
      break;
    case EncodeMode:
    case StandardMode:
    default:
      printf("\n");
      break;
    }

    /* now, after printing all the positional attributes, print end tags with -H,-C,-X */
    if (mode == EncodeMode  || mode == ConclineMode || mode == XMLMode) {

      /* print s-attributes from s_att_regions[] (using sar_sort_index[] in reverse order) */
      for (i = N_sar - 1; i >= 0; i--) {
        SAttRegion *region = &(s_att_regions[sar_sort_index[i]]);

        if (region->end == w) {
          if (mode == XMLMode) {
            printf("<tag type=\"end\" name=\"%s\"", region->name);
            if (printnum)
              printf(" cpos=\"%d\"", w);
            printf("/>\n");
          }
          else {
            printf("</%s>%c", region->name, (mode == ConclineMode ? ' ' : '\n'));
          }
        }
      }

      /* print a-attributes from print_list[] */
      for (i = 0; i < print_list_index; i++) {
        switch (print_list[i]->any.type) {
        case ATT_ALIGN:
          if (
              ((alg = cl_cpos2alg(print_list[i], w)) >= 0)
              && (cl_alg2cpos(print_list[i], alg,
                              &aligned_start, &aligned_end,
                              &aligned_start2, &aligned_end2))
              && (w == aligned_end)
              ) {
            if (mode == XMLMode) {
              printf("<align type=\"end\" target=\"%s\"", print_list[i]->any.name);
              if (printnum)
                printf(" start=\"%d\" end=\"%d\"", aligned_start2, aligned_end2);
              printf("/>\n");
            }
            else {
              printf("</%s", print_list[i]->any.name);
              if (printnum)
                printf(" %d %d", aligned_start2, aligned_end2);
              printf(">%c", (mode == EncodeMode) ? '\n' : ' ');
            }
          }
          break;

        default:
          /* ignore all other attribute types */
          break;
        }
      }

    } /* end of print end tags */

  }  /* end of match range loop: for w from start_context to end_context */

  /* end of match (for matchlist mode in particular) */
  if ((context != NULL) && (mode == LispMode))
    printf(")\n");
  else if (mode == ConclineMode)
    printf("\n");

  return;
}
Ejemplo n.º 5
0
/**
 * Main function for cwb-s-encode.
 *
 * @param argc   Number of command-line arguments.
 * @param argv   Command-line arguments.
 */
int
main(int argc, char **argv)
{
  int input_line;
  int start, end;
  char *annot;
  char buf[CL_MAX_LINE_LENGTH];
  Attribute *att;
  int V_switch, values, S_annotations_dropped;
  int i, N;

  progname = argv[0];
  sencode_parse_options(argc, argv);

  /* -a mode: read existing regions into memory */
  if (add_to_existing) {
    if (corpus == NULL) {
      Rprintf( "Error: You have to specify source corpus (-C <corpus>) for -a switch.\n");
      rcqp_receive_error(1);
    }
    att = cl_new_attribute(corpus, new_satt.name, ATT_STRUC);
    if ((att != NULL) && (cl_max_struc(att) > 0)) {
      V_switch = new_satt.store_values;
      values = cl_struc_values(att);
      if (V_switch && (!values)) {
        Rprintf( "Error: Existing regions of -V attribute have no annotations.\n");
        rcqp_receive_error(1);
      }
      else if ((!V_switch) && values) {
        Rprintf( "Error: Existing regions of -S attributes have annotations.\n");
        rcqp_receive_error(1);
      }
      if (!silent)
        Rprintf("[Loading previous <%s> regions]\n", new_satt.name);

      N = cl_max_struc(att);
      for (i = 0; i < N; i++) {
        cl_struc2cpos(att, i, &start, &end);
        annot = cl_struc2str(att, i);
        SL_insert(start, end, annot);
      }
    }
    else {
      if (!silent)
        Rprintf("[No <%s> regions defined (skipped)]\n", new_satt.name);
    }
  }

  /* loop reading input (stdin or -f <file>) */
  if (in_memory && (!silent))
    Rprintf("[Reading input data]\n");
  input_line = 0;
  S_annotations_dropped = 0;
  while (fgets(buf, CL_MAX_LINE_LENGTH, text_fd)) {
    input_line++;

    /* check for buffer overflow */
    if (strlen(buf) >= (CL_MAX_LINE_LENGTH - 1)) {
      Rprintf( "BUFFER OVERFLOW, input line #%d is too long:\n>> %s", input_line, buf);
      rcqp_receive_error(1);
    }

    if (! sencode_parse_line(buf, &start, &end, &annot)) {
      Rprintf( "FORMAT ERROR on line #%d:\n>> %s", input_line, buf);
      rcqp_receive_error(1);
    }
    if (new_satt.store_values && (annot == NULL)) {
      Rprintf( "MISSING ANNOTATION on line #%d:\n>> %s", input_line, buf);
      rcqp_receive_error(1);
    }
    if ((!new_satt.store_values) && (annot != NULL)) {
      if (! S_annotations_dropped)
        Rprintf( "WARNING: Annotation for -S attribute ignored on line #%d (warning issued only once):\n>> %s", input_line, buf);
      S_annotations_dropped++;
    }
    if ((start <= new_satt.last_cpos) || (end < start)) {
      Rprintf( "RANGE INCONSISTENCY on line #%d:\n>> %s(end of previous region was %d)\n", input_line, buf, new_satt.last_cpos);
      rcqp_receive_error(1);
    }
    if (annot != NULL && set_att != set_none) {
      /* convert set annotation into standard syntax */
      annot = sencode_check_set(annot);
      if (annot == NULL) {
        Rprintf( "SET ANNOTATION SYNTAX ERROR on line #%d:\n>> %s", input_line, buf);
        rcqp_receive_error(1);
      }
    }

    /* debugging output */
    if (debug) {
      Rprintf( "[%d, %d]", start, end);
      if (annot != NULL)
        Rprintf( " <%s>", annot);
      Rprintf( "\n");
    }

    /* in -M mode, store this region in memory; otherwise write it to the disk files */
    if (in_memory)
      SL_insert(start, end, annot);
    else
      sencode_write_region(start, end, annot);

    cl_free(annot);
  }

  /* in -M mode, write data to disk now that we have finished looping across input data */
  if (in_memory) {
    SL item;

    if (!silent)
      Rprintf("[Creating encoded disk file(s)]\n");
    SL_rewind();
    while ((item = SL_next()) != NULL)
      sencode_write_region(item->start, item->end, item->annot);
  }

  /* close files */
  sencode_close_files();

  if (S_annotations_dropped > 0)
    Rprintf( "Warning: %d annotation values dropped for -S attribute '%s'.\n", S_annotations_dropped, new_satt.name);

  rcqp_receive_error(0);
}
Ejemplo n.º 6
0
Archivo: groups.c Proyecto: cran/rcqp
Group *compute_grouping(CorpusList *cl,
                        FieldType source_field,
                        int source_offset,
                        char *source_attr_name,
                        FieldType target_field,
                        int target_offset,
                        char *target_attr_name,
                        int cutoff_freq)
{
  Group *group;
  Attribute *source_attr, *target_attr;
  int source_is_struc = 0, target_is_struc = 0;
  char *source_base = NULL, *target_base = 0;

  if ((cl == NULL) || (cl->corpus == NULL)) {
    cqpmessage(Warning, "Grouping:\nCan't access corpus.");
    return NULL;
  }

  if ((cl->size == 0) || (cl->range == NULL)) {
    cqpmessage(Warning, "Corpus %s is empty, no grouping possible",
               cl->name);
    return NULL;
  }

  if ((source_attr_name == NULL) && (source_field == NoField)) {
    source_attr = NULL;
  }
  else {
    source_attr = find_attribute(cl->corpus, source_attr_name, ATT_POS, NULL);
    if (source_attr == NULL) {
      source_attr = find_attribute(cl->corpus, source_attr_name, ATT_STRUC, NULL);
      source_is_struc = 1;
    }
    if (source_attr == NULL) {
      cqpmessage(Error, "Can't find attribute ``%s'' for named query %s",
                 source_attr_name, cl->name);
      return NULL;
    }
    if (source_is_struc) {
      if (cl_struc_values(source_attr)) {
        source_base = cl_struc2str(source_attr, 0); /* should be beginning of the attribute's lexicon */
        assert(source_base && "Internal error. Please don't use s-attributes in group command.");
      }
      else {
        cqpmessage(Error, "No annotated values for s-attribute ``%s'' in named query %s",
                   source_attr_name, cl->name);
        return NULL;
      }
    }

    switch (source_field) {
    case KeywordField:
      if (cl->keywords == NULL) {
        cqpmessage(Error, "No keyword anchors defined for %s", cl->name);
        return NULL;
      }
      break;
      
    case TargetField:
      if (cl->targets == NULL) {
        cqpmessage(Error, "No target anchors defined for %s", cl->name);
        return NULL;
      }
      break;
      
    case MatchField:
    case MatchEndField:
      assert(cl->range && cl->size > 0);
      break;
      
    case NoField:
    default:
      cqpmessage(Error, "Illegal second anchor in group command");
      return NULL;
      break;
    }
  }

  target_attr = find_attribute(cl->corpus, target_attr_name, ATT_POS, NULL);
  if (target_attr == NULL) {
      target_attr = find_attribute(cl->corpus, target_attr_name, ATT_STRUC, NULL);
      target_is_struc = 1;
  }
  if (target_attr == NULL) {
    cqpmessage(Error, "Can't find attribute ``%s'' for named query %s",
               target_attr_name, cl->name);
    return NULL;
  }
  if (target_is_struc) {
    if (cl_struc_values(target_attr)) {
      target_base = cl_struc2str(target_attr, 0); /* should be beginning of the attribute's lexicon */
      assert(target_base && "Internal error. Please don't use s-attributes in group command.");
    }
    else {
      cqpmessage(Error, "No annotated values for s-attribute ``%s'' in named query %s",
                 target_attr_name, cl->name);
      return NULL;
    }
  }

  switch (target_field) {
  case KeywordField:
    if (cl->keywords == NULL) {
      cqpmessage(Error, "No keyword anchors defined for %s", cl->name);
      return NULL;
    }
    break;
    
  case TargetField:
    if (cl->targets == NULL) {
      cqpmessage(Error, "No target anchors defined for %s", cl->name);
      return NULL;
    }
    break;
    
  case MatchField:
  case MatchEndField:
    assert(cl->range && cl->size > 0);
    break;
    
  case NoField:
  default:
    cqpmessage(Error, "Illegal anchor in group command");
    return NULL;
    break;
  }

  /* set up Group object */
  group = (Group *) cl_malloc(sizeof(Group));
  group->my_corpus = cl;
  group->source_attribute = source_attr;
  group->source_offset = source_offset;
  group->source_is_struc = source_is_struc;
  group->source_base = source_base;
  group->source_field = source_field;
  group->target_attribute = target_attr;
  group->target_offset = target_offset;
  group->target_is_struc = target_is_struc;
  group->target_base = target_base;
  group->target_field = target_field;
  group->nr_cells = 0;
  group->count_cells = NULL;
  group->cutoff_frequency = cutoff_freq;

  if (UseExternalGrouping && !insecure && !(source_is_struc || target_is_struc))
    return ComputeGroupExternally(group); /* modifies Group object in place and returns pointer or NULL */
  else
    return ComputeGroupInternally(group);
}