Пример #1
0
/** attribute (selected/unselected) print helper routine  */
void 
PrintAttributes(FILE *fd, char *header, AttributeList *al, int show_if_annot)
{
  int line = 0, i;
  AttributeInfo *current;

  if (al && al->list) {
    for (current = al->list; current; current = current->next) {
      if (line++ == 0) 
        fprintf(fd, "%s", header);
      else 
        for (i = strlen(header); i; i--)
          fprintf(fd, " ");
      if (current->status)
        fprintf(fd, "  * ");
      else
        fprintf(fd, "    ");
      /* structural attributes only;
       * note we DEPEND on show_if_annot only being true iff al is a list of struc attributes,
       * otherwise calling cl_struc_values will cause a cl_error */
      if (!show_if_annot || !cl_struc_values(current->attribute))
        fprintf(fd, "%s\n", current->attribute->any.name);
      else
        fprintf(fd, "%-20s [A]\n", current->attribute->any.name);
    }
  }
  else
    fprintf(fd, "%s    <none>\n", header);
}
Пример #2
0
/* 
 * ------------------------------------------------------------------------
 * 
 * "rcqpCmd_structural_attribute_has_values(SEXP inAttribute)" --
 * 
 * 
 * 
 * ------------------------------------------------------------------------
 */
SEXP rcqpCmd_structural_attribute_has_values(SEXP inAttribute)
{
	SEXP			result = R_NilValue;
	char *			a;
	Attribute *		attribute;
	
	
	/* rcqp_initialize(); */
	if (!isString(inAttribute) || length(inAttribute) != 1) error("argument 'attribute' must be a string");
	PROTECT(inAttribute);

	a = (char*)CHAR(STRING_ELT(inAttribute,0));
	
	attribute = cqi_lookup_attribute(a, ATT_STRUC);
	if (attribute != NULL) {
		result = PROTECT(allocVector(LGLSXP, 1));
		LOGICAL(result)[0] = (cl_struc_values(attribute) != 0);
	} else {
		rcqp_error_code(cqi_errno);
	}

	UNPROTECT(2);

	return result;
}
Пример #3
0
/** attribute print helper routine (non pretty-printing mode)
 *  ( TODO desperately needs a better name ) */
void 
PrintAttributesSimple(FILE *fd, char *type, AttributeList *al, int show_if_annot)
{
  AttributeInfo *ai;

  if (al && al->list) {
    for (ai = al->list; ai; ai = ai->next) {
      fprintf(fd, "%s\t%s", type, ai->attribute->any.name);
      if (show_if_annot) {
        fprintf(fd, "\t%s", (cl_struc_values(ai->attribute)) ? "-V" : "");
      }
      fprintf(fd, "\n");
    }
  }
}
Пример #4
0
/**
 * Prints statistical information about a corpus to STDOUT.
 *
 * Each corpus attribute gets info printed about it:
 * tokens and types for a P-attribute, number of instances
 * of regions for an S-attribute, number of alignment
 * blocks for an A-attribute.
 *
 * @param corpus  The corpus to analyse.
 */
void 
describecorpus_show_statistics (Corpus *corpus)
{
  Attribute *a;
  int tokens, types, regions, blocks;

  for (a = corpus->attributes; a; a = a->any.next) {
    switch(a->any.type) {
    case ATT_POS:
      Rprintf("p-ATT %-16s ", a->any.name);
      tokens = cl_max_cpos(a);
      types = cl_max_id(a);
      if ((tokens > 0) && (types > 0))
        Rprintf("%10d tokens, %8d types", tokens, types);
      else 
        Rprintf("           NO DATA");
      break;
    case ATT_STRUC:
      Rprintf("s-ATT %-16s ", a->any.name); 
      regions = cl_max_struc(a);
      if (regions >= 0) {
        Rprintf("%10d regions", regions);
        if (cl_struc_values(a))
          Rprintf(" (with annotations)");
      }
      else 
        Rprintf("           NO DATA");
      break;
    case ATT_ALIGN:
      Rprintf("a-ATT %-16s ", a->any.name); 
      blocks = cl_max_alg(a);
      if (blocks >= 0) {
        Rprintf("%10d alignment blocks", blocks);
        if (cl_has_extended_alignment(a))
          Rprintf(" (extended)");
      }
      else
        Rprintf("           NO DATA");
      break;
    default:
      Rprintf("???   %-16s (unknown attribute type)", a->any.name); 
      break;
    }
    Rprintf("\n");
  }

  Rprintf("\n");
}
Пример #5
0
void
do_cqi_corpus_structural_attribute_has_values(void) {
  char *a;
  Attribute *attribute;
          
  a = cqi_read_string();        /* need to try all possible attribute types */
  if (server_debug)
   Rprintf( "CQi: CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES('%s')\n", a);
  attribute = cqi_lookup_attribute(a, ATT_STRUC);
  if (attribute != NULL) {
    cqi_data_bool(cl_struc_values(attribute));
  }
  else {
    cqi_command(cqi_errno);
  }
  free(a);
}
Пример #6
0
/**
 * Main function for cwb-decode.
 *
 * @param argc   Number of command-line arguments.
 * @param argv   Command-line arguments.
 */
int
main(int argc, char **argv)
{
  Attribute *attr;
  Attribute *context = NULL;

  int sp;  /* start position of a match */
  int ep;  /* end position of a match */

  int w, cnt, read_pos_frm_stdin;

  char s[CL_MAX_LINE_LENGTH];      /* buffer for strings read from file */
  char *token;

  char *input_filename = NULL;
  FILE *input_file = stdin;

  /* ------------------------------------------------- PARSE ARGUMENTS */

  int c;
  extern char *optarg;
  extern int optind;

  progname = argv[0];

  first_token = -1;
  last = -1;
  maxlast = -1;

  read_pos_frm_stdin = 0;

  /* use getopt() to parse command-line options */
  while((c = getopt(argc, argv, "+s:e:r:nLHCxXf:ph")) != EOF)
    switch(c) {

      /* s: start corpus position */
    case 's':
      first_token = atoi(optarg);
      break;

      /* e: end corpus position */
    case 'e':
      last = atoi(optarg);
      break;

      /* r: registry directory */
    case 'r':
      if (registry_directory == NULL)
        registry_directory = optarg;
      else {
        fprintf(stderr, "%s: -r option used twice\n", progname);
        exit(2);
      }
      break;

      /* n: show cpos in -H mode */
    case 'n':
      printnum++;
      break;

      /* x: XML-compatible output in -C mode (-Cx) */
    case 'x':
      xml_compatible++;
      break;

      /* L,H,C,X: Lisp, Horizontal, Compact, and XML modes */
    case 'L':
      mode = LispMode;
      break;
    case 'H':
      mode = ConclineMode;
      break;
    case 'C':
      mode = EncodeMode;
      break;
    case 'X':
      mode = XMLMode;
      break;

      /* f: matchlist mode / read corpus positions from file */
    case 'f':
      input_filename = optarg;
      break;

      /* p: matchlist mode / read corpus positions from stdin */
    case 'p':
      read_pos_frm_stdin++;
      break;

      /* h: help page */
    case 'h':
      decode_usage(2);
      break;

    default:
      fprintf(stderr, "Illegal option. Try \"%s -h\" for more information.\n", progname);
      fprintf(stderr, "[remember that options go before the corpus name, and attribute declarations after it!]\n");
      decode_cleanup(2);
    }

  /* required argument: corpus id */
  if (optind < argc) {
    corpus_id = argv[optind++];

    if ((corpus = cl_new_corpus(registry_directory, corpus_id)) == NULL) {
      fprintf(stderr, "Corpus %s not found in registry %s . Aborted.\n",
              corpus_id,
              (registry_directory ? registry_directory : cl_standard_registry() ) );
      decode_cleanup(1);
    }
  }
  else {
    fprintf(stderr, "Missing argument. Try \"%s -h\" for more information.\n", progname);
    decode_cleanup(2);
  }


  /* now parse output flags (-P, -S, ...) [cnt is our own argument counter] */
  for (cnt = optind; cnt < argc; cnt++) {
    if (strcmp(argv[cnt], "-c") == 0) {         /* -c: context */

      if ((context = cl_new_attribute(corpus, argv[++cnt], ATT_STRUC)) == NULL) {
        fprintf(stderr, "Can't open s-attribute %s.%s . Aborted.\n",
                corpus_id, argv[cnt]);
        decode_cleanup(1);
      }

    }
    else if (strcmp(argv[cnt], "-P") == 0) {    /* -P: positional attribute */

      if ((attr = cl_new_attribute(corpus, argv[++cnt], ATT_POS)) == NULL) {
        fprintf(stderr, "Can't open p-attribute %s.%s . Aborted.\n", corpus_id, argv[cnt]);
        decode_cleanup(1);
      }
      else {
        if (cl_max_cpos(attr) > 0) {
          decode_add_attribute(attr);
          if (maxlast < 0)
            maxlast = cl_max_cpos(attr); /* determines corpus size */
        }
        else {
          fprintf(stderr, "Attribute %s.%s is declared, but not accessible (missing data?). Aborted.\n",
                  corpus_id, argv[cnt]);
          decode_cleanup(1);
        }
      }

    }
    else if (strcmp(argv[cnt], "-ALL") == 0) {  /* -ALL: all p-attributes and s-attributes */

      for (attr = corpus->attributes; attr; attr = attr->any.next)
        if (attr->any.type == ATT_POS) {
          decode_add_attribute(attr);
          if (maxlast < 0)
            maxlast = cl_max_cpos(attr);
        }
        else if (attr->any.type == ATT_STRUC) {
          decode_add_attribute(attr);
        }

    }
    else if (strcmp(argv[cnt], "-D") == 0) {    /* -D: dynamic attribute (not implemented) */

      fprintf(stderr, "Sorry, dynamic attributes are not implemented. Aborting.\n");
      decode_cleanup(2);

    }
    else if (strcmp(argv[cnt], "-A") == 0) {    /* -A: alignment attribute */

      if ((attr = cl_new_attribute(corpus, argv[++cnt], ATT_ALIGN)) == NULL) {
        fprintf(stderr, "Can't open a-attribute %s.%s . Aborted.\n", corpus_id, argv[cnt]);
        decode_cleanup(1);
      }
      else
        decode_add_attribute(attr);
    }
    else if (strcmp(argv[cnt], "-S") == 0) {    /* -S: structural attribute (as tags) */

      if ((attr = cl_new_attribute(corpus, argv[++cnt], ATT_STRUC)) == NULL) {
        fprintf(stderr, "Can't open s-attribute %s.%s . Aborted.\n",
                corpus_id, argv[cnt]);
        decode_cleanup(1);
      }
      else
        decode_add_attribute(attr);
    }
    else if (strcmp(argv[cnt], "-V") == 0) {    /* -V: show structural attribute values (with -p or -f) */

      if ((attr = cl_new_attribute(corpus, argv[++cnt], ATT_STRUC)) == NULL) {
        fprintf(stderr, "Can't open s-attribute %s.%s . Aborted.\n",
                corpus_id, argv[cnt]);
        decode_cleanup(1);
      }
      else if (!cl_struc_values(attr)) {
        fprintf(stderr, "S-attribute %s.%s does not have annotations. Aborted.\n",
                corpus_id, argv[cnt]);
        decode_cleanup(1);
      }
      else if (printValuesIndex >= MAX_PRINT_VALUES) {
        fprintf(stderr, "Too many -V attributes, sorry. Aborted.\n");
        decode_cleanup(1);
      }
      else
        printValues[printValuesIndex++] = attr;
    }
    else {

      fprintf(stderr, "Unknown flag: %s\n", argv[cnt]);
      decode_cleanup(2);

    }
  }
  /* ---- end of parse attribute declarations ---- */

  if (input_filename != NULL) {
    if (strcmp(input_filename, "-") == 0)
      input_file = stdin;
    else if ((input_file = fopen(input_filename, "r")) == NULL) {
      perror(input_filename);
      exit(1);
    }
    read_pos_frm_stdin++;
  }

  decode_verify_print_value_list();

  /* ------------------------------------------------------------ DECODE CORPUS */

  if (read_pos_frm_stdin == 0) {
    /*
     * normal mode: decode entire corpus or specified range
     */

    if (maxlast < 0) {
      fprintf(stderr, "Need at least one p-attribute (-P flag). Aborted.\n");
      decode_cleanup(2);
    }

    if (first_token < 0 || first_token >= maxlast)
      first_token = 0;

    if (last < 0 || last >= maxlast)
      last = maxlast - 1;

    if (last < first_token) {
      fprintf(stderr, "Warning: output range #%d..#%d is empty. No output.\n", first_token, last);
      decode_cleanup(2);
    }

    if ( (mode == XMLMode) ||  ((mode == EncodeMode) && xml_compatible) ) {
      decode_print_xml_declaration();
      printf("<corpus name=\"%s\" start=\"%d\" end=\"%d\">\n",
             corpus_id, first_token, last);
    }

    /* decode_print_surrounding_s_att_values(first_token); */ /* don't do that in "normal" mode, coz it doesn't make sense */

    for (w = first_token; w <= last; w++)
      decode_print_token_sequence(w, -1, context);

    if ( (mode == XMLMode) || ((mode == EncodeMode) && xml_compatible) ) {
      printf("</corpus>\n");
    }
  }
  else {
    /*
     * matchlist mode: read (pairs of) corpus positions from stdin or file
     */

    if ( (mode == XMLMode) || ((mode == EncodeMode) && xml_compatible) ) {
      decode_print_xml_declaration();
      printf("<matchlist corpus=\"%s\">\n", corpus_id);
    }

    cnt = 0;
    while (fgets(s, CL_MAX_LINE_LENGTH, input_file) != NULL) {

      token = strtok(s, " \t\n");

      if ((token != NULL) && is_num(token)) {
        sp = atoi(token);

        ep = -1;
        if ((token = strtok(NULL, " \t\n")) != NULL) {
          if (!is_num(token)) {
            fprintf(stderr, "Invalid corpus position #%s . Aborted.\n", token);
            decode_cleanup(1);
          }
          else
            ep = atoi(token);
        }

        cnt++;                  /* count matches in matchlist  */
        if (mode == XMLMode) {
          printf("<match nr=\"%d\"", cnt);
          if (printnum)
            printf(" start=\"%d\" end=\"%d\"", sp, (ep >= 0) ? ep : sp);
          printf(">\n");
        }
        else {
          /* nothing shown before range */
        }

        decode_print_surrounding_s_att_values(sp);

        decode_print_token_sequence(sp, ep, context);

        if (mode == XMLMode) {
          printf("</match>\n");
        }
        else if (mode != ConclineMode) {
          printf("\n");         /* blank line, unless in -H mode */
        }
      }
      else {
        fprintf(stderr, "Invalid corpus position #%s . Aborted.\n", s);
        decode_cleanup(1);
      }
    }

    if (input_file != stdin)
      fclose(input_file);

    if ( (mode == XMLMode) || ((mode == EncodeMode) && xml_compatible) ) {
      printf("</matchlist>\n");
    }
  }

  decode_cleanup(0);
  return 0;                     /* just to keep gcc from complaining */
}
Пример #7
0
/**
 * Prints out the requested attributes for a sequence of tokens
 * (or a single token if end_position == -1).
 *
 * If the -c flag was used (and, thus, the context parameter is not NULL),
 * then the sequence is extended to the entire s-attribute region (in matchlist mode).
 */
void
decode_print_token_sequence(int start_position, int end_position, Attribute *context)
{
  int alg, aligned_start, aligned_end, aligned_start2, aligned_end2,
    rng_start, rng_end, snum;
  int start_context, end_context, dummy;
  int lastposa, i, w;

  /* pointer used for values of p-attributes */
  char *wrd;


  start_context = start_position;
  end_context = (end_position >= 0) ? end_position : start_position;
  /* above ensures that in non-matchlist mode (where ep == -1), we only print one token */

  if (context != NULL) {

    /* expand the start_context end_context numbers to the start
     * and end points of the containing region of the context s-attribute */
    if (!cl_cpos2struc2cpos(context, start_position,
                            &start_context, &end_context)) {
      start_context = start_position;
      end_context = (end_position >= 0) ? end_position : start_position;
    }
    else if (end_position >= 0) {
      if (!cl_cpos2struc2cpos(context, end_position,
                               &dummy, &end_context)) {
        end_context = (end_position >= 0) ? end_position : start_position;
      }
    }

    /* indicate that we're showing context */
    switch (mode) {
    case LispMode:
      printf("(TARGET %d\n", start_position);
      if (end_position >= 0)
        printf("(INTERVAL %d %d)\n", start_position, end_position);
      break;
    case EncodeMode:
    case ConclineMode:
      /* nothing here */
      break;
    case XMLMode:
      printf("<context start=\"%d\" end=\"%d\"/>\n", start_context, end_context);
      break;
    case StandardMode:
    default:
      if (end_position >= 0) {
        printf("INTERVAL %d %d\n", start_position, end_position);
      }
      else {
        printf("TARGET %d\n", start_position);
      }
      break;
    }

  } /* endif context != NULL */

  /* some extra information in -L and -H modes */
  if (mode == LispMode && end_position != -1)
    printf("(CONTEXT %d %d)\n", start_context, end_context);
  else if (mode == ConclineMode) {
    if (printnum)
      printf("%8d: ", start_position);
  }

  /* now print the token sequence (including context) with all requested attributes */
  for (w = start_context; w <= end_context; w++) {
    int beg_of_line;

    /* extract s-attribute regions for start and end tags into s_att_regions[] */
    N_sar = 0;                  /* counter and index */
    for (i = 0; i < print_list_index; i++) {
      if (print_list[i]->any.type == ATT_STRUC) {
        if ( ((snum = cl_cpos2struc(print_list[i], w)) >= 0) &&
             (cl_struc2cpos(print_list[i], snum, &rng_start, &rng_end)) &&
             ((w == rng_start) || (w == rng_end)) ) {
          s_att_regions[N_sar].name = print_list[i]->any.name;
          s_att_regions[N_sar].start = rng_start;
          s_att_regions[N_sar].end = rng_end;
          if (cl_struc_values(print_list[i]))
            s_att_regions[N_sar].annot = cl_struc2str(print_list[i], snum);
          else
            s_att_regions[N_sar].annot = NULL;
          N_sar++;
        }
      }
    }
    decode_sort_s_att_regions();       /* sort regions to ensure proper nesting of start and end tags */

    /* show corpus positions with -n option */
    if (printnum)
      switch (mode) {
      case LispMode:
        printf("(%d ", w);
        break;
      case EncodeMode:
        printf("%8d\t", w);
        break;
      case ConclineMode:
        /* nothing here (shown at start of line in -H mode) */
        break;
      case XMLMode:
        /* nothing here */
        break;
      case StandardMode:
      default:
        printf("%8d: ", w);
        break;
      }
    else {
      if (mode == LispMode)
        printf("(");            /* entire match is parenthesised list in -L mode */
    }

    lastposa = -1;

    /* print start tags (s- and a-attributes) with -C,-H,-X */
    if ((mode == EncodeMode) || (mode == ConclineMode) || (mode == XMLMode)) {

      /* print a-attributes from print_list[] */
      for (i = 0; i < print_list_index; i++) {
        switch (print_list[i]->any.type) {
        case ATT_ALIGN:
          if (
              ((alg = cl_cpos2alg(print_list[i], w)) >= 0)
              && (cl_alg2cpos(print_list[i], alg,
                              &aligned_start, &aligned_end,
                              &aligned_start2, &aligned_end2))
              && (w == aligned_start)
              ) {
            if (mode == XMLMode) {
              printf("<align type=\"start\" target=\"%s\"", print_list[i]->any.name);
              if (printnum)
                printf(" start=\"%d\" end=\"%d\"", aligned_start2, aligned_end2);
              printf("/>\n");
            }
            else {
              printf("<%s", print_list[i]->any.name);
              if (printnum)
                printf(" %d %d", aligned_start2, aligned_end2);
              printf(">%c", (mode == EncodeMode) ? '\n' : ' ');
            }
          }
          break;

        default:                /* ignore all other attribute types */
          break;
        }
      }

      /* print s-attributes from s_att_regions[] (using sar_sort_index[]) */
      for (i = 0; i < N_sar; i++) {
        SAttRegion *region = &(s_att_regions[sar_sort_index[i]]);

        if (region->start == w) {
          if (mode == XMLMode) {
            printf("<tag type=\"start\" name=\"%s\"", region->name);
            if (printnum)
              printf(" cpos=\"%d\"", w);
            if (region->annot)
              printf(" value=\"%s\"", decode_string_escape(region->annot));
            printf("/>\n");
          }
          else {
            printf("<%s%s%s>%c",
                   region->name,
                   region->annot ? " " : "",
                   region->annot ? region->annot : "",
                   (mode == ConclineMode ? ' ' : '\n'));
          }
        }
      }

    }

    /* now print token with its attribute values (p-attributes only for -C,-H,-X) */
    if (mode == XMLMode) {
      printf("<token");
      if (printnum)
        printf(" cpos=\"%d\"", w);
      printf(">");
    }

    beg_of_line = 1;
    /* Loop printing each attribute for this cpos (w) */
    for (i = 0; i < print_list_index; i++) {

      switch (print_list[i]->any.type) {
      case ATT_POS:
        lastposa = i;
        if ((wrd = decode_string_escape(cl_cpos2str(print_list[i], w))) != NULL) {
          switch (mode) {
          case LispMode:
            printf("(%s \"%s\")", print_list[i]->any.name, wrd);
            break;

          case EncodeMode:
            if (beg_of_line) {
              printf("%s", wrd);
              beg_of_line = 0;
            }
            else
              printf("\t%s", wrd);
            break;

          case ConclineMode:
            if (beg_of_line) {
              printf("%s", wrd);
              beg_of_line = 0;
            }
            else
              printf("/%s", wrd);
            break;

          case XMLMode:
            printf(" <attr name=\"%s\">%s</attr>", print_list[i]->any.name, wrd);
            break;

          case StandardMode:
          default:
            printf("%s=%s\t", print_list[i]->any.name, wrd);
            break;
          }
        }
        else {
          cl_error("(aborting) cl_cpos2str() failed");
          decode_cleanup(1);
        }
        break;

      case ATT_ALIGN:
        /* do not print in encode, concline or xml modes because already done (above) */
        if ((mode != EncodeMode) && (mode != ConclineMode) && (mode != XMLMode)) {
          if (
              ((alg = cl_cpos2alg(print_list[i], w)) >= 0)
              && (cl_alg2cpos(print_list[i], alg,
                              &aligned_start, &aligned_end,
                              &aligned_start2, &aligned_end2))
              ) {
            if (mode == LispMode) {
              printf("(ALG %d %d %d %d)",
                     aligned_start, aligned_end, aligned_start2, aligned_end2);
            }
            else {
              printf("%d-%d==>%s:%d-%d\t",
                     aligned_start, aligned_end, print_list[i]->any.name, aligned_start2, aligned_end2);
            }
          }
          else if (cl_errno != CDA_OK) {
            cl_error("(aborting) alignment error");
            decode_cleanup(1);
          }
        }
        break;

      case ATT_STRUC:
        /* do not print in encode, concline or xml modes because already done (above) */
        if ((mode != EncodeMode) && (mode != ConclineMode) && (mode != XMLMode)) {
          if (cl_cpos2struc2cpos(print_list[i], w, &rng_start, &rng_end)) {
            /* standard and -L mode don't show tag annotations */
            printf(mode == LispMode ? "(STRUC %s %d %d)" : "<%s>:%d-%d\t",
                   print_list[i]->any.name,
                   rng_start, rng_end);
          }
          else if (cl_errno != CDA_OK)
            cl_error("(aborting) cl_cpos2struc2cpos() failed");
        }
        break;

      case ATT_DYN:
        /* dynamic attributes aren't implemented */
      default:
        break;
      }
    }

    /* print token separator (or end of token in XML mode) */
    switch (mode) {
    case LispMode:
      printf(")\n");
      break;
    case ConclineMode:
      printf(" ");
      break;
    case XMLMode:
      printf(" </token>\n");
      break;
    case EncodeMode:
    case StandardMode:
    default:
      printf("\n");
      break;
    }

    /* now, after printing all the positional attributes, print end tags with -H,-C,-X */
    if (mode == EncodeMode  || mode == ConclineMode || mode == XMLMode) {

      /* print s-attributes from s_att_regions[] (using sar_sort_index[] in reverse order) */
      for (i = N_sar - 1; i >= 0; i--) {
        SAttRegion *region = &(s_att_regions[sar_sort_index[i]]);

        if (region->end == w) {
          if (mode == XMLMode) {
            printf("<tag type=\"end\" name=\"%s\"", region->name);
            if (printnum)
              printf(" cpos=\"%d\"", w);
            printf("/>\n");
          }
          else {
            printf("</%s>%c", region->name, (mode == ConclineMode ? ' ' : '\n'));
          }
        }
      }

      /* print a-attributes from print_list[] */
      for (i = 0; i < print_list_index; i++) {
        switch (print_list[i]->any.type) {
        case ATT_ALIGN:
          if (
              ((alg = cl_cpos2alg(print_list[i], w)) >= 0)
              && (cl_alg2cpos(print_list[i], alg,
                              &aligned_start, &aligned_end,
                              &aligned_start2, &aligned_end2))
              && (w == aligned_end)
              ) {
            if (mode == XMLMode) {
              printf("<align type=\"end\" target=\"%s\"", print_list[i]->any.name);
              if (printnum)
                printf(" start=\"%d\" end=\"%d\"", aligned_start2, aligned_end2);
              printf("/>\n");
            }
            else {
              printf("</%s", print_list[i]->any.name);
              if (printnum)
                printf(" %d %d", aligned_start2, aligned_end2);
              printf(">%c", (mode == EncodeMode) ? '\n' : ' ');
            }
          }
          break;

        default:
          /* ignore all other attribute types */
          break;
        }
      }

    } /* end of print end tags */

  }  /* end of match range loop: for w from start_context to end_context */

  /* end of match (for matchlist mode in particular) */
  if ((context != NULL) && (mode == LispMode))
    printf(")\n");
  else if (mode == ConclineMode)
    printf("\n");

  return;
}
Пример #8
0
/**
 * Main function for cwb-s-encode.
 *
 * @param argc   Number of command-line arguments.
 * @param argv   Command-line arguments.
 */
int
main(int argc, char **argv)
{
  int input_line;
  int start, end;
  char *annot;
  char buf[CL_MAX_LINE_LENGTH];
  Attribute *att;
  int V_switch, values, S_annotations_dropped;
  int i, N;

  progname = argv[0];
  sencode_parse_options(argc, argv);

  /* -a mode: read existing regions into memory */
  if (add_to_existing) {
    if (corpus == NULL) {
      Rprintf( "Error: You have to specify source corpus (-C <corpus>) for -a switch.\n");
      rcqp_receive_error(1);
    }
    att = cl_new_attribute(corpus, new_satt.name, ATT_STRUC);
    if ((att != NULL) && (cl_max_struc(att) > 0)) {
      V_switch = new_satt.store_values;
      values = cl_struc_values(att);
      if (V_switch && (!values)) {
        Rprintf( "Error: Existing regions of -V attribute have no annotations.\n");
        rcqp_receive_error(1);
      }
      else if ((!V_switch) && values) {
        Rprintf( "Error: Existing regions of -S attributes have annotations.\n");
        rcqp_receive_error(1);
      }
      if (!silent)
        Rprintf("[Loading previous <%s> regions]\n", new_satt.name);

      N = cl_max_struc(att);
      for (i = 0; i < N; i++) {
        cl_struc2cpos(att, i, &start, &end);
        annot = cl_struc2str(att, i);
        SL_insert(start, end, annot);
      }
    }
    else {
      if (!silent)
        Rprintf("[No <%s> regions defined (skipped)]\n", new_satt.name);
    }
  }

  /* loop reading input (stdin or -f <file>) */
  if (in_memory && (!silent))
    Rprintf("[Reading input data]\n");
  input_line = 0;
  S_annotations_dropped = 0;
  while (fgets(buf, CL_MAX_LINE_LENGTH, text_fd)) {
    input_line++;

    /* check for buffer overflow */
    if (strlen(buf) >= (CL_MAX_LINE_LENGTH - 1)) {
      Rprintf( "BUFFER OVERFLOW, input line #%d is too long:\n>> %s", input_line, buf);
      rcqp_receive_error(1);
    }

    if (! sencode_parse_line(buf, &start, &end, &annot)) {
      Rprintf( "FORMAT ERROR on line #%d:\n>> %s", input_line, buf);
      rcqp_receive_error(1);
    }
    if (new_satt.store_values && (annot == NULL)) {
      Rprintf( "MISSING ANNOTATION on line #%d:\n>> %s", input_line, buf);
      rcqp_receive_error(1);
    }
    if ((!new_satt.store_values) && (annot != NULL)) {
      if (! S_annotations_dropped)
        Rprintf( "WARNING: Annotation for -S attribute ignored on line #%d (warning issued only once):\n>> %s", input_line, buf);
      S_annotations_dropped++;
    }
    if ((start <= new_satt.last_cpos) || (end < start)) {
      Rprintf( "RANGE INCONSISTENCY on line #%d:\n>> %s(end of previous region was %d)\n", input_line, buf, new_satt.last_cpos);
      rcqp_receive_error(1);
    }
    if (annot != NULL && set_att != set_none) {
      /* convert set annotation into standard syntax */
      annot = sencode_check_set(annot);
      if (annot == NULL) {
        Rprintf( "SET ANNOTATION SYNTAX ERROR on line #%d:\n>> %s", input_line, buf);
        rcqp_receive_error(1);
      }
    }

    /* debugging output */
    if (debug) {
      Rprintf( "[%d, %d]", start, end);
      if (annot != NULL)
        Rprintf( " <%s>", annot);
      Rprintf( "\n");
    }

    /* in -M mode, store this region in memory; otherwise write it to the disk files */
    if (in_memory)
      SL_insert(start, end, annot);
    else
      sencode_write_region(start, end, annot);

    cl_free(annot);
  }

  /* in -M mode, write data to disk now that we have finished looping across input data */
  if (in_memory) {
    SL item;

    if (!silent)
      Rprintf("[Creating encoded disk file(s)]\n");
    SL_rewind();
    while ((item = SL_next()) != NULL)
      sencode_write_region(item->start, item->end, item->annot);
  }

  /* close files */
  sencode_close_files();

  if (S_annotations_dropped > 0)
    Rprintf( "Warning: %d annotation values dropped for -S attribute '%s'.\n", S_annotations_dropped, new_satt.name);

  rcqp_receive_error(0);
}
Пример #9
0
Файл: groups.c Проект: cran/rcqp
Group *compute_grouping(CorpusList *cl,
                        FieldType source_field,
                        int source_offset,
                        char *source_attr_name,
                        FieldType target_field,
                        int target_offset,
                        char *target_attr_name,
                        int cutoff_freq)
{
  Group *group;
  Attribute *source_attr, *target_attr;
  int source_is_struc = 0, target_is_struc = 0;
  char *source_base = NULL, *target_base = 0;

  if ((cl == NULL) || (cl->corpus == NULL)) {
    cqpmessage(Warning, "Grouping:\nCan't access corpus.");
    return NULL;
  }

  if ((cl->size == 0) || (cl->range == NULL)) {
    cqpmessage(Warning, "Corpus %s is empty, no grouping possible",
               cl->name);
    return NULL;
  }

  if ((source_attr_name == NULL) && (source_field == NoField)) {
    source_attr = NULL;
  }
  else {
    source_attr = find_attribute(cl->corpus, source_attr_name, ATT_POS, NULL);
    if (source_attr == NULL) {
      source_attr = find_attribute(cl->corpus, source_attr_name, ATT_STRUC, NULL);
      source_is_struc = 1;
    }
    if (source_attr == NULL) {
      cqpmessage(Error, "Can't find attribute ``%s'' for named query %s",
                 source_attr_name, cl->name);
      return NULL;
    }
    if (source_is_struc) {
      if (cl_struc_values(source_attr)) {
        source_base = cl_struc2str(source_attr, 0); /* should be beginning of the attribute's lexicon */
        assert(source_base && "Internal error. Please don't use s-attributes in group command.");
      }
      else {
        cqpmessage(Error, "No annotated values for s-attribute ``%s'' in named query %s",
                   source_attr_name, cl->name);
        return NULL;
      }
    }

    switch (source_field) {
    case KeywordField:
      if (cl->keywords == NULL) {
        cqpmessage(Error, "No keyword anchors defined for %s", cl->name);
        return NULL;
      }
      break;
      
    case TargetField:
      if (cl->targets == NULL) {
        cqpmessage(Error, "No target anchors defined for %s", cl->name);
        return NULL;
      }
      break;
      
    case MatchField:
    case MatchEndField:
      assert(cl->range && cl->size > 0);
      break;
      
    case NoField:
    default:
      cqpmessage(Error, "Illegal second anchor in group command");
      return NULL;
      break;
    }
  }

  target_attr = find_attribute(cl->corpus, target_attr_name, ATT_POS, NULL);
  if (target_attr == NULL) {
      target_attr = find_attribute(cl->corpus, target_attr_name, ATT_STRUC, NULL);
      target_is_struc = 1;
  }
  if (target_attr == NULL) {
    cqpmessage(Error, "Can't find attribute ``%s'' for named query %s",
               target_attr_name, cl->name);
    return NULL;
  }
  if (target_is_struc) {
    if (cl_struc_values(target_attr)) {
      target_base = cl_struc2str(target_attr, 0); /* should be beginning of the attribute's lexicon */
      assert(target_base && "Internal error. Please don't use s-attributes in group command.");
    }
    else {
      cqpmessage(Error, "No annotated values for s-attribute ``%s'' in named query %s",
                 target_attr_name, cl->name);
      return NULL;
    }
  }

  switch (target_field) {
  case KeywordField:
    if (cl->keywords == NULL) {
      cqpmessage(Error, "No keyword anchors defined for %s", cl->name);
      return NULL;
    }
    break;
    
  case TargetField:
    if (cl->targets == NULL) {
      cqpmessage(Error, "No target anchors defined for %s", cl->name);
      return NULL;
    }
    break;
    
  case MatchField:
  case MatchEndField:
    assert(cl->range && cl->size > 0);
    break;
    
  case NoField:
  default:
    cqpmessage(Error, "Illegal anchor in group command");
    return NULL;
    break;
  }

  /* set up Group object */
  group = (Group *) cl_malloc(sizeof(Group));
  group->my_corpus = cl;
  group->source_attribute = source_attr;
  group->source_offset = source_offset;
  group->source_is_struc = source_is_struc;
  group->source_base = source_base;
  group->source_field = source_field;
  group->target_attribute = target_attr;
  group->target_offset = target_offset;
  group->target_is_struc = target_is_struc;
  group->target_base = target_base;
  group->target_field = target_field;
  group->nr_cells = 0;
  group->count_cells = NULL;
  group->cutoff_frequency = cutoff_freq;

  if (UseExternalGrouping && !insecure && !(source_is_struc || target_is_struc))
    return ComputeGroupExternally(group); /* modifies Group object in place and returns pointer or NULL */
  else
    return ComputeGroupInternally(group);
}
Пример #10
0
/* tabulate specified query result, using settings from global list of tabulation items;
   return value indicates whether tabulation was successful (otherwise, generates error message) */
int
print_tabulation(CorpusList *cl, int first, int last, struct Redir *rd)
{
  TabulationItem item = TabulationList;
  int current;
  
  if (! cl) 
    return 0;

  if (first <= 0) first = 0;    /* make sure that first and last match to tabulate are in range */
  if (last >= cl->size) last = cl->size - 1;

  while (item) {                /* obtain attribute handles for tabulation items */
    if (item->attribute_name) {
      if (NULL != (item->attribute = cl_new_attribute(cl->corpus, item->attribute_name, ATT_POS))) {
        item->attribute_type = ATT_POS;
      }
      else if (NULL != (item->attribute = cl_new_attribute(cl->corpus, item->attribute_name, ATT_STRUC))) {
        item->attribute_type = ATT_STRUC;
        if (! cl_struc_values(item->attribute)) {
          cqpmessage(Error, "No annotated values for s-attribute ``%s'' in named query %s", item->attribute_name, cl->name);
          return 0;
        }
      }
      else {
        cqpmessage(Error, "Can't find attribute ``%s'' for named query %s", item->attribute_name, cl->name);
        return 0;
      }
    }
    else {
      item->attribute_type = ATT_NONE; /* no attribute -> print corpus position */
    }
    if (cl->size > 0) {
      /* work around bug: anchor validation will fail for empty query result (but then loop below is void anyway) */
      if (! (pt_validate_anchor(cl, item->anchor1) && pt_validate_anchor(cl, item->anchor2)))
	return 0;
    }
    item = item->next;
  }

  if (! open_stream(rd, cl->corpus->charset)) {
    cqpmessage(Error, "Can't redirect output to file or pipe\n");
    return 0;
  }

  /* tabulate selected attribute values for matches <first> .. <last> */
  for (current = first; current <= last; current++) {
    TabulationItem item = TabulationList;
    while (item) {
      int start = pt_get_anchor_cpos(cl, current, item->anchor1, item->offset1);
      int end   = pt_get_anchor_cpos(cl, current, item->anchor2, item->offset2);
      int cpos;

      if (start < 0 || end < 0) /* one of the anchors is undefined -> print single undefined value for entire range */
        start = end = -1;

      for (cpos = start; cpos <= end; cpos++) {
        if (item->attribute_type == ATT_NONE) {
          fprintf(rd->stream, "%d", cpos);
        }
        else {
          if (cpos >= 0) {      /* undefined anchors print empty string */
            char *string = NULL;
            if (item->attribute_type == ATT_POS) 
              string = cl_cpos2str(item->attribute, cpos);
            else
              string = cl_cpos2struc2str(item->attribute, cpos);
            if (string) {
              if (item->flags) {
                char *copy = cl_strdup(string);
                cl_string_canonical(copy, cl->corpus->charset, item->flags);
                fprintf(rd->stream, "%s", copy);
                cl_free(copy);
              }
              else {
                fprintf(rd->stream, "%s", string);
              }
            }
          }
        }
        if (cpos < end)         /* multiple values for tabulation item are separated by blanks */
          fprintf(rd->stream, " "); 
      }
      if (item->next)           /* multiple tabulation items are separated by TABs */
        fprintf(rd->stream, "\t");
      item = item->next;
    }
    fprintf(rd->stream, "\n");
  }
  
  close_stream(rd);
  free_tabulation_list();
  return 1;
}