Пример #1
0
int
pt_validate_anchor(CorpusList *cl, FieldType anchor) {
  switch (anchor) {
  case KeywordField:
    if (cl->keywords == NULL) {
      cqpmessage(Error, "No keyword anchors defined for named query %s", cl->name);
      return 0;
    }
    break;
  case TargetField:
    if (cl->targets == NULL) {
      cqpmessage(Error, "No target anchors defined for named query %s", cl->name);
      return 0;
    }
    break;
  case MatchField:
  case MatchEndField:
    /* should always be present */
    assert(cl->range != NULL);
    break;
  case NoField:
  default:
    cqpmessage(Error, "Illegal anchor in tabulate command");
    return 0;
    break;
  }
  return 1;
}
Пример #2
0
Файл: cqp.c Проект: rforge/rcwb
/**
 * Parses a string for CQP query syntax.
 *
 * @param s  The string to parse.
 * @return   Boolean: true = all ok, false = a problem.
 */
int
cqp_parse_string(char *s)
{
  int ok, len, abort;
  int cqp_status;

  ok = 1;
  abort = 0;
  len = strlen(s);

  cqp_input_string_position = 0;
  cqp_input_string = s;

  while (ok && (cqp_input_string_position < len) && !exit_cqp) {

    if (abort) {        /* trying to parse a second command -> abort with error */
      cqpmessage(Error, "Multiple commands on a single line not allowed in CQPserver mode.");
      ok = 0;
      break;
    }

    cqp_status = yyparse();
    if (cqp_status != 0)
      ok = 0;

    if (which_app == cqpserver)
      abort = 1;        /* only one command per line in CQPserver (security reasons) */

  } /* endwhile */

  cqp_input_string_position = 0;
  cqp_input_string = NULL;

  return ok;
}
Пример #3
0
/* print_output():
 * Ausgabe von CL, ohne Header, auf stream
 */
void 
print_output(CorpusList *cl, 
             FILE *fd,
             int interactive,
             ContextDescriptor *cd,
             int first, int last, /* range checking done by mode-specific print function */
             PrintMode mode)
{
  switch (mode) {
    
  case PrintSGML:
    sgml_print_output(cl, fd, interactive, cd, first, last);
    break;
    
  case PrintHTML:
    html_print_output(cl, fd, interactive, cd, first, last);
    break;
    
  case PrintLATEX:
    latex_print_output(cl, fd, interactive, cd, first, last);
    break;
    
  case PrintASCII:
    ascii_print_output(cl, fd, interactive, cd, first, last);
    break;
    
  default:
    cqpmessage(Error, "Unknown print mode");
    break;
  }
}
Пример #4
0
Файл: groups.c Проект: cran/rcqp
void print_group(Group *group, int expand, struct Redir *rd)
{
  if (group && open_stream(rd, group->my_corpus->corpus->charset)) {

    switch (GlobalPrintMode) {
    
    case PrintSGML:
      sgml_print_group(group, expand, rd->stream);
      break;
      
    case PrintHTML:
      html_print_group(group, expand, rd->stream);
      break;
      
    case PrintLATEX:
      latex_print_group(group, expand, rd->stream);
      break;
      
    case PrintASCII:
      ascii_print_group(group, expand, rd->stream);
      break;
    
    default:
      cqpmessage(Error, "Unknown print mode");
      break;
    }

    close_stream(rd);
    
  }
}
Пример #5
0
Файл: cqp.c Проект: rforge/rcwb
/**
 * Installs the interrupt signal handler function with the OS.
 *
 * This function installs a Ctrl-C interrupt handler (clears
 * EvaluationIsRunning flag). The function installed is
 * sigINT_signal_handler.
 *
 * @see sigINT_signal_handler
 */
void
install_signal_handler(void)
{
  signal_handler_is_installed = 1; /* pretend it's installed even if it wasn't done properly, so CQP won't keep trying */
  if (signal(SIGINT, sigINT_signal_handler) == SIG_ERR) {
    cqpmessage(Warning, "Can't install interrupt handler.\n");
    signal(SIGINT, SIG_IGN);
  }
}
Пример #6
0
SearchStrategy string_to_strategy(char *s)
{
  if (s == NULL) 
    return SearchNone;
  else if (strcasecmp(s, "leftmost") == 0)
    return SearchLeftmost;
  else if (strcasecmp(s, "rightmost") == 0)
    return SearchRightmost;
  else if (strcasecmp(s, "nearest") == 0)
    return SearchNearest;
  else if (strcasecmp(s, "farthest") == 0)
    return SearchFarthest;
  else {
    cqpmessage(Warning, "Illegal search strategy specification ``%s''", s);
    return SearchNone;
  }
}
Пример #7
0
Файл: groups.c Проект: cran/rcqp
Group *compute_grouping(CorpusList *cl,
                        FieldType source_field,
                        int source_offset,
                        char *source_attr_name,
                        FieldType target_field,
                        int target_offset,
                        char *target_attr_name,
                        int cutoff_freq)
{
  Group *group;
  Attribute *source_attr, *target_attr;
  int source_is_struc = 0, target_is_struc = 0;
  char *source_base = NULL, *target_base = 0;

  if ((cl == NULL) || (cl->corpus == NULL)) {
    cqpmessage(Warning, "Grouping:\nCan't access corpus.");
    return NULL;
  }

  if ((cl->size == 0) || (cl->range == NULL)) {
    cqpmessage(Warning, "Corpus %s is empty, no grouping possible",
               cl->name);
    return NULL;
  }

  if ((source_attr_name == NULL) && (source_field == NoField)) {
    source_attr = NULL;
  }
  else {
    source_attr = find_attribute(cl->corpus, source_attr_name, ATT_POS, NULL);
    if (source_attr == NULL) {
      source_attr = find_attribute(cl->corpus, source_attr_name, ATT_STRUC, NULL);
      source_is_struc = 1;
    }
    if (source_attr == NULL) {
      cqpmessage(Error, "Can't find attribute ``%s'' for named query %s",
                 source_attr_name, cl->name);
      return NULL;
    }
    if (source_is_struc) {
      if (cl_struc_values(source_attr)) {
        source_base = cl_struc2str(source_attr, 0); /* should be beginning of the attribute's lexicon */
        assert(source_base && "Internal error. Please don't use s-attributes in group command.");
      }
      else {
        cqpmessage(Error, "No annotated values for s-attribute ``%s'' in named query %s",
                   source_attr_name, cl->name);
        return NULL;
      }
    }

    switch (source_field) {
    case KeywordField:
      if (cl->keywords == NULL) {
        cqpmessage(Error, "No keyword anchors defined for %s", cl->name);
        return NULL;
      }
      break;
      
    case TargetField:
      if (cl->targets == NULL) {
        cqpmessage(Error, "No target anchors defined for %s", cl->name);
        return NULL;
      }
      break;
      
    case MatchField:
    case MatchEndField:
      assert(cl->range && cl->size > 0);
      break;
      
    case NoField:
    default:
      cqpmessage(Error, "Illegal second anchor in group command");
      return NULL;
      break;
    }
  }

  target_attr = find_attribute(cl->corpus, target_attr_name, ATT_POS, NULL);
  if (target_attr == NULL) {
      target_attr = find_attribute(cl->corpus, target_attr_name, ATT_STRUC, NULL);
      target_is_struc = 1;
  }
  if (target_attr == NULL) {
    cqpmessage(Error, "Can't find attribute ``%s'' for named query %s",
               target_attr_name, cl->name);
    return NULL;
  }
  if (target_is_struc) {
    if (cl_struc_values(target_attr)) {
      target_base = cl_struc2str(target_attr, 0); /* should be beginning of the attribute's lexicon */
      assert(target_base && "Internal error. Please don't use s-attributes in group command.");
    }
    else {
      cqpmessage(Error, "No annotated values for s-attribute ``%s'' in named query %s",
                 target_attr_name, cl->name);
      return NULL;
    }
  }

  switch (target_field) {
  case KeywordField:
    if (cl->keywords == NULL) {
      cqpmessage(Error, "No keyword anchors defined for %s", cl->name);
      return NULL;
    }
    break;
    
  case TargetField:
    if (cl->targets == NULL) {
      cqpmessage(Error, "No target anchors defined for %s", cl->name);
      return NULL;
    }
    break;
    
  case MatchField:
  case MatchEndField:
    assert(cl->range && cl->size > 0);
    break;
    
  case NoField:
  default:
    cqpmessage(Error, "Illegal anchor in group command");
    return NULL;
    break;
  }

  /* set up Group object */
  group = (Group *) cl_malloc(sizeof(Group));
  group->my_corpus = cl;
  group->source_attribute = source_attr;
  group->source_offset = source_offset;
  group->source_is_struc = source_is_struc;
  group->source_base = source_base;
  group->source_field = source_field;
  group->target_attribute = target_attr;
  group->target_offset = target_offset;
  group->target_is_struc = target_is_struc;
  group->target_base = target_base;
  group->target_field = target_field;
  group->nr_cells = 0;
  group->count_cells = NULL;
  group->cutoff_frequency = cutoff_freq;

  if (UseExternalGrouping && !insecure && !(source_is_struc || target_is_struc))
    return ComputeGroupExternally(group); /* modifies Group object in place and returns pointer or NULL */
  else
    return ComputeGroupInternally(group);
}
Пример #8
0
Файл: groups.c Проект: cran/rcqp
Group *
ComputeGroupExternally(Group *group)
{
  int i;
  int size = group->my_corpus->size;
  int cutoff_freq = group->cutoff_frequency;

  char temporary_name[TEMP_FILENAME_BUFSIZE];
  FILE *fd;
  FILE *pipe;
  char sort_call[CL_MAX_LINE_LENGTH];

  /* ---------------------------------------------------------------------- */

  if ((fd = open_temporary_file(temporary_name)) == NULL) {
    perror("Error while opening temporary file");
    cqpmessage(Warning, "Can't open temporary file");
    return group;
  }

  for (i = 0; i < size; i++) {
    fprintf(fd, "%d %d\n", get_group_id(group, i, 0), get_group_id(group, i, 1)); /* (source ID, target ID) */
  }
  fclose(fd);

  /* construct sort call */
  sprintf(sort_call, ExternalGroupingCommand, temporary_name);
  if (GROUP_DEBUG)
   Rprintf( "Running grouping sort: \n\t%s\n",
            sort_call);
  if ((pipe = popen(sort_call, "r")) == NULL) {
    perror("Failure opening grouping pipe");
    cqpmessage(Warning, "Can't open grouping pipe:\n%s\n"
               "Disable external grouping by\n"
               "  set UseExternalGrouping off;", 
               sort_call);
  }
  else {
    int freq, p1, p2, tokens;
#define GROUP_REALLOC 16

    while ((tokens = fscanf(pipe, "%d%d%d", &freq, &p1, &p2)) == 3) {
      if (freq > cutoff_freq) {
        if ((group->nr_cells % GROUP_REALLOC) == 0) {
          if (group->count_cells == NULL) {
            group->count_cells = 
              (ID_Count_Mapping *)cl_malloc(GROUP_REALLOC *
                                         sizeof(ID_Count_Mapping));
          }
          else {
            group->count_cells = 
              (ID_Count_Mapping *)cl_realloc(group->count_cells,
                                          (group->nr_cells + GROUP_REALLOC) *
                                          sizeof(ID_Count_Mapping));
          }
          assert(group->count_cells);
        }

        group->count_cells[group->nr_cells].s = p1;
        group->count_cells[group->nr_cells].t = p2;
        group->count_cells[group->nr_cells].freq = freq;

        group->nr_cells = group->nr_cells + 1;
      }
    }

    if (tokens != EOF) {
     Rprintf( "Warning: could not reach EOF of temporary file!\n");
    }

    pclose(pipe);
  }

  if (GROUP_DEBUG) {
   Rprintf( "Keeping temporary file %s -- delete manually\n",
            temporary_name);
  }
  else if (unlink(temporary_name) != 0) {
    perror(temporary_name);
   Rprintf( "Can't remove temporary file %s -- \n\tI will continue, "
            "but you should remove that file.\n", temporary_name);
  }
  
  return group;
}
Пример #9
0
Файл: groups.c Проект: cran/rcqp
Group *
ComputeGroupInternally(Group *group)
{
  ID_Count_Mapping node;
  ID_Count_Mapping *result;

  int i;
  size_t nr_nodes;
  int percentage, new_percentage; /* for ProgressBar */
  int size = group->my_corpus->size;

  /* ---------------------------------------------------------------------- */

  nr_nodes = 0;
  
  if (progress_bar)
    progress_bar_clear_line();
  percentage = -1;

  EvaluationIsRunning = 1;

  for (i = 0; i < size; i++) {
    if (! EvaluationIsRunning)
      break;                    /* user abort (Ctrl-C) */

    if (progress_bar) {
      new_percentage = floor(0.5 + (100.0 * i) / size);
      if (new_percentage > percentage) {
        percentage = new_percentage;
        progress_bar_percentage(1, 2, percentage);
      }
    }

    node.s = get_group_id(group, i, 0);       /* source ID */
    node.t = get_group_id(group, i, 1);       /* target ID */
    node.freq = 0;
  
    result = binsert_g(&node,
                       (void **) &(group->count_cells),
                       &nr_nodes,
                       sizeof(ID_Count_Mapping),
                       compare_st_cells);

    result->freq++;
  }

  if (EvaluationIsRunning) {
    group->nr_cells = sum_freqs(group->count_cells, nr_nodes, group->cutoff_frequency);
    
    if (progress_bar)
      progress_bar_clear_line();
    
    if (group->nr_cells < nr_nodes)
      group->count_cells = 
        cl_realloc(group->count_cells, (group->nr_cells * sizeof(ID_Count_Mapping)));
  }
  else {
    cqpmessage(Warning, "Group operation aborted by user.");
    if (which_app == cqp) install_signal_handler();
    free_group(&group);         /* sets return value to NULL to indicate failure */
  }
  EvaluationIsRunning = 0;
    
  return group;
}
Пример #10
0
int evaluate_subset(CorpusList *cl, /* the corpus */
                    FieldType the_field,       /* the field to scan */
                    Constrainttree constr)
{
  int line, position;
  int percentage, new_percentage; /* for ProgressBar */

  assert(cl && constr);
  assert(cl->type == SUB || cl->type == TEMP);

  percentage = -1;

  EvaluationIsRunning = 1;
  for (line = 0; (line < cl->size) && EvaluationIsRunning; line++) {

    if (progress_bar) {
      new_percentage = floor(0.5 + (100.0 * line) / cl->size);
      if (new_percentage > percentage) {
        percentage = new_percentage;
        progress_bar_percentage(0, 0, percentage);
      }
    }

    switch (the_field) {
    
    case MatchField:
      position = cl->range[line].start;
      break;
      
    case MatchEndField:
      position = cl->range[line].end;
      break;
      
    case KeywordField:
      assert(cl->keywords);
      position = cl->keywords[line];
      break;
      
    case TargetField:
      assert(cl->targets);
      position = cl->targets[line];
      break;
      
    case NoField:
    default:
      position = -1;
      break;
    }

    if (position < 0 || (!eval_bool(constr, NULL, position))) {
      cl->range[line].start = -1;
      cl->range[line].end   = -1;
    }
  }
  
  /* if interrupted, delete part of temporary query result which hasn't been filtered;
     so that the result is incomplete but at least contains only correct matches */
  while (line < cl->size) {
    cl->range[line].start = -1;
    cl->range[line].end   = -1;
    line++;
  }

  if (!EvaluationIsRunning) {
    cqpmessage(Warning, "Evaluation interruted: results may be incomplete.");
    if (which_app == cqp) install_signal_handler();
  }
  EvaluationIsRunning = 0;

  if (progress_bar) 
    progress_bar_message(0, 0, "  cleaning up");

  (void) RangeSetop(cl, RReduce, NULL, NULL);

  return 1;
}
Пример #11
0
/**
 * Verify the current context settings against the current corpus:
 * check whether structures are still valid, and reset them to
 * defaults if not. returns 1 if all keeps the same, 0 otherwise. The
 * string fields in CD are supposed to be malloced and freed.
 */
int
verify_context_descriptor(Corpus *corpus,
                          ContextDescriptor *cd,
                          int remove_illegal_entries)
{
  int result = 1;

  if (cd == NULL) {
    Rprintf( "verify_context_descriptor(): WARNING: Context Descriptor empty!\n");
    result = 0;
  }
  else if (corpus == NULL) {
    Rprintf( "verify_context_descriptor(): WARNING: Corpus Descriptor empty!\n");
    RESET_LEFT_CONTEXT;
    RESET_RIGHT_CONTEXT;
    cd->attributes = NULL;
    result = 0;
  }
  else {

    /* check left attribute */
    if (cd->left_type == STRUC_CONTEXT) {
      if (cd->left_structure_name == NULL) {
        RESET_LEFT_CONTEXT;
        result = 0;
      }
      else {
        /* find (structural) attribute */
        if ((cd->left_structure = find_attribute(corpus,
                                                 cd->left_structure_name,
                                                 ATT_STRUC, NULL))
            == NULL) {
          /* not defined -> try alignment attribute */
          if ((cd->left_structure = find_attribute(corpus,
                                                   cd->left_structure_name,
                                                   ATT_ALIGN, NULL))
              == NULL) {
            /* error -> reset to default context */
            RESET_LEFT_CONTEXT;
            result = 0;
          }
          else {
            /* alignment attribute found -> change context type to ALIGN_CONTEXT */
            cd->left_type = ALIGN_CONTEXT;
            if (cd->left_width != 1) {
              cqpmessage(Warning,
                         "Left Context '%d %s' changed to '1 %s' (alignment attribute).",
                         cd->left_width,
                         cd->left_structure_name,
                         cd->left_structure_name);
              cd->left_width = 1;
            }
          }
        }
      }
    }
    if (cd->left_width < 0) {
      Rprintf( "concordance.o/verify_context_descriptor: WARNING: lwidth < 0\n");
      cd->left_width = -cd->left_width;
      result = 0;
    }

    /* check right attribute */
    if (cd->right_type == STRUC_CONTEXT) {
      if (cd->right_structure_name == NULL) {
        RESET_RIGHT_CONTEXT;
        result = 0;
      }
      else {
        /* find (structural) attribute */
        if ((cd->right_structure = find_attribute(corpus,
                                                 cd->right_structure_name,
                                                 ATT_STRUC, NULL))
            == NULL) {
          /* not defined -> try alignment attribute */
          if ((cd->right_structure = find_attribute(corpus,
                                                   cd->right_structure_name,
                                                   ATT_ALIGN, NULL))
              == NULL) {
            /* error -> reset to default context */
            RESET_RIGHT_CONTEXT;
            result = 0;
          }
          else {
            /* alignment attribute found -> change context type to ALIGN_CONTEXT */
            cd->right_type = ALIGN_CONTEXT;
            if (cd->right_width != 1) {
              cqpmessage(Warning,
                         "Right Context '%d %s' changed to '1 %s' (alignment attribute).",
                         cd->right_width,
                         cd->right_structure_name,
                         cd->right_structure_name);
              cd->right_width = 1;
            }
          }
        }
      }
    }
    if (cd->right_width < 0) {
      Rprintf( "concordance.o/verify_context_descriptor: WARNING: lwidth < 0\n");
      cd->right_width = -cd->right_width;
      result = 0;
    }

    /* cd->print_cpos = 0; */
    
    VerifyList(cd->attributes, corpus, remove_illegal_entries);
    if (cd->attributes && cd->attributes->list == NULL)
      DestroyAttributeList(&(cd->attributes));

    VerifyList(cd->strucAttributes, corpus, remove_illegal_entries);
    if (cd->strucAttributes && cd->strucAttributes->list == NULL)
      DestroyAttributeList(&(cd->strucAttributes));

    VerifyList(cd->printStructureTags, corpus, remove_illegal_entries);
    if (cd->printStructureTags && cd->printStructureTags->list == NULL)
      DestroyAttributeList(&(cd->printStructureTags));

    VerifyList(cd->alignedCorpora, corpus, remove_illegal_entries);
    if (cd->alignedCorpora && cd->alignedCorpora->list == NULL)
      DestroyAttributeList(&(cd->alignedCorpora));
      
  }
  return result;
}
Пример #12
0
int
SetVariableValue(char *varName, 
                 char operator,
                 char *varValues)
{
  Variable v;
  char *item;
  FILE *fd;

  if ((v = FindVariable(varName)) == NULL) {

    v = NewVariable(varName);
    
    if (v == NULL) {
      cqpmessage(Error, "Out of memory.");
      return 0;
    }
  }

  switch (operator) {
    
  case '+':                        /* += operator: extend */
    
    item = strtok(varValues, " \t\n");
    while (item) {
      VariableAddItem(v, item);
      item = strtok(NULL, " \t\n");
    }

    break;

  case '-':                        /* -= operator: substract */

    item = strtok(varValues, " \t\n");
    while (item) {
      VariableSubtractItem(v, item);
      item = strtok(NULL, " \t\n");
    }

    break;

  case '=':                        /* = operator: absolute setting */

    VariableDeleteItems(v);

    item = strtok(varValues, " \t\n");
    while (item) {
      VariableAddItem(v, item);
      item = strtok(NULL, " \t\n");
    }
    break;

  case '<':                        /* < operator: read from file */

    VariableDeleteItems(v);

    if ((fd = open_file(varValues, "r"))) {
      
      int l;
      char s[CL_MAX_LINE_LENGTH];

      while (fgets(s, CL_MAX_LINE_LENGTH, fd) != NULL) {

        l = strlen(s);

        if (l > 0 && s[l-1] == '\n') {

          /* strip trailing newline */
          s[l-1] = '\0'; l--;
        }

        if (l > 0)
          VariableAddItem(v, s);
      }
      fclose(fd);
    }
    else {
      perror(varValues);
      cqpmessage(Warning, "Can't open %s: no such file or directory",
                 varValues);
      return 0;
    }
    break;
    
  default:
    return 0;
    break;
  }

  return 1;
}
Пример #13
0
/**
 * Prints a concordance line.
 * (documentation not complete)_
 *
 *
 */
void 
print_concordance_line(FILE *outfd,
                       CorpusList *cl,
                       int element,
                       int apply_highlighting,
                       AttributeList *strucs)
{
  char *outstr;
  int length, string_match_begin_pos, string_match_end_pos;
  ConcLineField clf[NoField];        /* NoField is largest field code (not used by us) */
  PrintDescriptionRecord *pdr;

  if ((cl == NULL) || (outfd == NULL)) {
    cqpmessage(Error, "Empty corpus or empty output file");
    return;
  }
  
  if (element < 0 || element >= cl->size) {
    cqpmessage(Error, "Illegal element in print_concordance_line");
    return;
  }

  if (escapes_initialized == 0)
    get_screen_escapes();

  sc_s_mode = 0;                /* reset display flags */
  sc_u_mode = 0;
  sc_b_mode = 0;

  /* ---------------------------------------- concordance fields */

  clf[MatchField].type = MatchField;
  clf[MatchField].start_position = cl->range[element].start;
  clf[MatchField].end_position = cl->range[element].end;
      
  clf[MatchEndField].type = MatchEndField; /* unused, because we use MatchField for the entire match */
  clf[MatchEndField].start_position = -1;
  clf[MatchEndField].end_position = -1;
      
  clf[KeywordField].type = KeywordField;
  if (cl->keywords) {
    clf[KeywordField].start_position = cl->keywords[element];
    clf[KeywordField].end_position = cl->keywords[element];
  }
  else {
    clf[KeywordField].start_position = -1;
    clf[KeywordField].end_position = -1;
  }
      
  clf[TargetField].type = TargetField;
  if (cl->targets) {
    clf[TargetField].start_position = cl->targets[element];
    clf[TargetField].end_position = cl->targets[element];
  }
  else {
    clf[TargetField].start_position = -1;
    clf[TargetField].end_position = -1;
  }

  if (apply_highlighting)
    pdr = &ASCIIHighlightedPrintDescriptionRecord;
  else
    pdr = &ASCIIPrintDescriptionRecord;

  outstr = compose_kwic_line(cl->corpus, 
                             cl->range[element].start, cl->range[element].end,
                             &CD,
                             &length,
                             &string_match_begin_pos, &string_match_end_pos,
                             left_delimiter, right_delimiter,
                             NULL, 0, NULL,
                             clf, NoField, /* NoField = # of entries in clf[] */
                             ConcLineHorizontal,
                             pdr,
                             0, NULL);

  fputs(outstr, outfd);
  free(outstr);
  
  if (pdr->AfterLine)
    fputs(pdr->AfterLine, outfd);
  
  if (CD.alignedCorpora != NULL)
    printAlignedStrings(cl->corpus, 
                        &CD,
                        cl->range[element].start, cl->range[element].end,
                        apply_highlighting,
                        outfd);
}
Пример #14
0
/* tabulate specified query result, using settings from global list of tabulation items;
   return value indicates whether tabulation was successful (otherwise, generates error message) */
int
print_tabulation(CorpusList *cl, int first, int last, struct Redir *rd)
{
  TabulationItem item = TabulationList;
  int current;
  
  if (! cl) 
    return 0;

  if (first <= 0) first = 0;    /* make sure that first and last match to tabulate are in range */
  if (last >= cl->size) last = cl->size - 1;

  while (item) {                /* obtain attribute handles for tabulation items */
    if (item->attribute_name) {
      if (NULL != (item->attribute = cl_new_attribute(cl->corpus, item->attribute_name, ATT_POS))) {
        item->attribute_type = ATT_POS;
      }
      else if (NULL != (item->attribute = cl_new_attribute(cl->corpus, item->attribute_name, ATT_STRUC))) {
        item->attribute_type = ATT_STRUC;
        if (! cl_struc_values(item->attribute)) {
          cqpmessage(Error, "No annotated values for s-attribute ``%s'' in named query %s", item->attribute_name, cl->name);
          return 0;
        }
      }
      else {
        cqpmessage(Error, "Can't find attribute ``%s'' for named query %s", item->attribute_name, cl->name);
        return 0;
      }
    }
    else {
      item->attribute_type = ATT_NONE; /* no attribute -> print corpus position */
    }
    if (cl->size > 0) {
      /* work around bug: anchor validation will fail for empty query result (but then loop below is void anyway) */
      if (! (pt_validate_anchor(cl, item->anchor1) && pt_validate_anchor(cl, item->anchor2)))
	return 0;
    }
    item = item->next;
  }

  if (! open_stream(rd, cl->corpus->charset)) {
    cqpmessage(Error, "Can't redirect output to file or pipe\n");
    return 0;
  }

  /* tabulate selected attribute values for matches <first> .. <last> */
  for (current = first; current <= last; current++) {
    TabulationItem item = TabulationList;
    while (item) {
      int start = pt_get_anchor_cpos(cl, current, item->anchor1, item->offset1);
      int end   = pt_get_anchor_cpos(cl, current, item->anchor2, item->offset2);
      int cpos;

      if (start < 0 || end < 0) /* one of the anchors is undefined -> print single undefined value for entire range */
        start = end = -1;

      for (cpos = start; cpos <= end; cpos++) {
        if (item->attribute_type == ATT_NONE) {
          fprintf(rd->stream, "%d", cpos);
        }
        else {
          if (cpos >= 0) {      /* undefined anchors print empty string */
            char *string = NULL;
            if (item->attribute_type == ATT_POS) 
              string = cl_cpos2str(item->attribute, cpos);
            else
              string = cl_cpos2struc2str(item->attribute, cpos);
            if (string) {
              if (item->flags) {
                char *copy = cl_strdup(string);
                cl_string_canonical(copy, cl->corpus->charset, item->flags);
                fprintf(rd->stream, "%s", copy);
                cl_free(copy);
              }
              else {
                fprintf(rd->stream, "%s", string);
              }
            }
          }
        }
        if (cpos < end)         /* multiple values for tabulation item are separated by blanks */
          fprintf(rd->stream, " "); 
      }
      if (item->next)           /* multiple tabulation items are separated by TABs */
        fprintf(rd->stream, "\t");
      item = item->next;
    }
    fprintf(rd->stream, "\n");
  }
  
  close_stream(rd);
  free_tabulation_list();
  return 1;
}
Пример #15
0
/**
 * Outputs a blob of information on the mother-corpus of the specified cl.
 */
void 
corpus_info(CorpusList *cl)
{
  FILE *fd;
  FILE *outfd;
  char buf[CL_MAX_LINE_LENGTH];
  int i, ok, stream_ok;
  struct Redir rd = { NULL, NULL, NULL, 0, 0 }; /* for paging (with open_stream()) */

  CorpusList *mom = NULL;
  CorpusProperty p;

  /* first, the case where cl is actually a full corpus */
  if (cl->type == SYSTEM) {

    stream_ok = open_stream(&rd, ascii);
    outfd = (stream_ok) ? rd.stream : NULL; /* use pager, or simply print to stdout if it fails */
    /* print size (should be the mother_size entry) */
    fprintf(outfd, "Size:    %d\n", cl->mother_size);
    /* print charset */
    fprintf(outfd, "Charset: ");

    if (cl->corpus->charset == unknown_charset) {
      fprintf(outfd, "<unsupported> (%s)\n", cl_corpus_property(cl->corpus, "charset"));
    }
    else {
      fprintf(outfd, "%s\n", cl_charset_name(cl->corpus->charset));
    }
    /* print properties */
    fprintf(outfd, "Properties:\n");
    p = cl_first_corpus_property(cl->corpus);
    if (p == NULL)
      fprintf(outfd, "\t<none>\n");
    else 
      for ( ; p != NULL; p = cl_next_corpus_property(p))
        fprintf(outfd, "\t%s = '%s'\n", p->property, p->value);
    fprintf(outfd, "\n");
    

    if (cl->corpus->info_file == NULL)
      fprintf(outfd, "No further information available about %s\n", cl->name);
    else if ((fd = open_file(cl->corpus->info_file, "rb")) == NULL)
      cqpmessage(Warning,
                 "Can't open info file %s for reading",
                 cl->corpus->info_file);
    else {
      ok = 1;
      do {
        i = fread(&buf[0], sizeof(char), CL_MAX_LINE_LENGTH, fd);
        if (fwrite(&buf[0], sizeof(char), i, outfd) != i)
          ok = 0;
      } while (ok && (i == CL_MAX_LINE_LENGTH));
      fclose(fd);
    }

    if (stream_ok) 
      close_stream(&rd);        /* close pipe to pager if we were using it */
  }
  /* if cl is not actually a full corpus, try to find its mother and call this function on that */
  else if (cl->mother_name == NULL)
    cqpmessage(Warning, 
               "Corrupt corpus information for %s", cl->name);
  else if ((mom = findcorpus(cl->mother_name, SYSTEM, 0)) != NULL) {
    corpus_info(mom);
  }
  /* if the mother is not loaded, we just have to print an error */
  else {
    cqpmessage(Info,
               "%s is a subcorpus of %s which is not loaded. Try 'info %s' "
               "for information about %s.\n",
               cl->name, cl->mother_name, cl->mother_name, cl->mother_name);
  }
}
Пример #16
0
/**
 * Prints a corpus, typically (some of) the matches of a query.
 *
 * (Not sure why it's called "catalog"; is this a pun on the cat keyword? -- AH 2012-07-17)
 *
 * The query is represented by a subcorpus (cl); only results
 * #first..#last; will be printed; use (0,-1) for entire corpus.
 *
 * @param cl     The corpus/subcorpus/query to output.
 * @param rd     Block of output redirection info; if NULL, default settings will be used.
 * @param first  Offset of first match to print.
 * @param last   Offset of last match to print.
 * @param mode   Print mode to use.
 */
void 
catalog_corpus(CorpusList *cl,
               struct Redir *rd,
               int first, int last,
               PrintMode mode)
{
  int i;
  Boolean printHeader = False;

  struct Redir default_redir;

  if ((cl == NULL) || (!access_corpus(cl)))
    return;

  if (!rd) {
    default_redir.name = NULL;
    default_redir.mode = "w";
    default_redir.stream = NULL;
    default_redir.is_pipe = 0;
    rd = &default_redir;
  }

  if (!open_stream(rd, cl->corpus->charset)) {
    cqpmessage(Error, "Can't open output stream.");
    return;
  }

  assert(rd->stream);

  /* ======================================== BINARY OUTPUT */

  if (rangeoutput || mode == PrintBINARY) {

    for (i = 0; (i < cl->size); i++) {
      fwrite(&(cl->range[i].start), sizeof(int), 1, rd->stream);
      fwrite(&(cl->range[i].end), sizeof(int), 1, rd->stream);
    }

  }
  else {

    /* ====================================== ASCII, SGML OR HTML OUTPUT */

/*     if (CD.printStructureTags == NULL) */
/*       CD.printStructureTags = ComputePrintStructures(cl); */
    /* now done for current_corpus in options.c ! */

    printHeader = GlobalPrintOptions.print_header;

    /* questionable... */
    if (GlobalPrintMode == PrintHTML)
      printHeader = True;

#ifndef __MINGW__
    if (rd->is_pipe && handle_sigpipe) {
      if (signal(SIGPIPE, bp_signal_handler) == SIG_ERR)
        perror("Can't install signal handler for broken pipe (ignored)");
    }
#endif

    /* do the job. */
    
    verify_context_descriptor(cl->corpus, &CD, 1);
    
    broken_pipe = 0;

    /* first version (Oli Christ):
       if ((!silent || printHeader) && !(rd->stream == stdout || rd->is_paging));
       */
    /* second version (Stefan Evert):     
       if (printHeader || (mode == PrintASCII && !(rd->stream == stdout || rd->is_paging))); 
    */

    /* header is printed _only_ when explicitly requested now (or, when in HTML mode; see above);
     * previous behaviour was to print header automatically when saving results to a file;
     * this makes sense when such files are created to document the results of a corpus search,
     * but nowadays they are mostly used for automatic post-processing (e.g. in a Web interface),
     * where the header is just a nuisance that has to be stripped.
     */
    if (printHeader) {
      /* print something like a header */
      print_corpus_info_header(cl, rd->stream, mode, 1);
    }
    else if (printNrMatches && mode == PrintASCII)
      fprintf(rd->stream, "%d matches.\n", cl->size);
    
    print_output(cl, rd->stream, 
                 isatty(fileno(rd->stream)) || rd->is_paging, 
                 &CD, first, last, mode);

#ifndef __MINGW__
    if (rd->is_paging && handle_sigpipe) {
      if (signal(SIGPIPE, SIG_IGN) == SIG_ERR)
        perror("Can't reinstall SIG_IGN signal handler");
    }
#endif
    
  }

  close_stream(rd);
}
Пример #17
0
int evaluate_target(CorpusList *corp,          /* the corpus */
                    FieldType t_id,            /* the field to set */
                    FieldType base,            /* where to start the search */
                    int inclusive,             /* including or excluding the base */
                    SearchStrategy strategy,   /* disambiguation rule: which item */
                    Constrainttree constr,     /* the constraint */
                    enum ctxtdir direction,    /* context direction */
                    int units,                       /* number of units */
                    char *attr_name)           /* name of unit */
{
  Attribute *attr;
  int *table;
  Context context;
  int i, line, lbound, rbound;
  int excl_start, excl_end;
  int nr_evals;
  int percentage, new_percentage; /* for ProgressBar */

  /* ------------------------------------------------------------ */

  assert(corp);

  /* consistency check */
  assert(t_id == TargetField || t_id == KeywordField || t_id == MatchField || t_id == MatchEndField);

  if (!constr) {
    cqpmessage(Error, "Constraing pattern missing in 'set target' command.");
    return 0;
  }

  if (corp->size <= 0) {
    cqpmessage(Error, "Corpus is empty.");
    return 0;
  }

  /*
   * check whether the base field specification is ok
   */
  switch(base) {
  case MatchField:
  case MatchEndField:
    if (corp->range == NULL) {
      cqpmessage(Error, "No ranges for start of search");
      return 0;
    }
    break;
  case TargetField:
    if (corp->targets == NULL) {
      cqpmessage(Error, "Can't start from base TARGET, none defined");
      return 0;
    }
    break;
  case KeywordField:
    if (corp->keywords == NULL) {
      cqpmessage(Error, "Can't start from base KEYWORD, none defined");
      return 0;
    }
    break;
  default:
    cqpmessage(Error, "Illegal base field (#%d) in 'set target' command.",
               base);
    return 0;
  }

  if (units <= 0) {
    cqpmessage(Error, "Invalid search space (%d units) in 'set target' command.", 
               units);
    return 0;
  }

  /* THIS SHOULD BE UNNECESSARY, BECAUSE THE GRAMMAR MAKES SURE THE SUBCORPUS EXISTS & IS LOADED */
  /*   if (!access_corpus(corp)) { */
  /*     cqpmessage(Error, "Can't access named query %s.", corp->name); */
  /*     return 0; */
  /*   } */

  context.size = units;
  context.direction = direction;

  if ((strcasecmp(attr_name, "word") == 0) ||
      (strcasecmp(attr_name, "words") == 0)) {
    attr = find_attribute(corp->corpus, DEFAULT_ATT_NAME, ATT_POS, NULL);
    context.type = word;
    context.attrib = NULL;
  }
  else {
    attr = find_attribute(corp->corpus, attr_name, ATT_STRUC, NULL);
    context.type = structure;
    context.attrib = attr;
  }

  if (attr == NULL) {
    cqpmessage(Error, "Can't find attribute %s.%s",
               corp->mother_name, attr_name);
    return 0;
  }

  if (progress_bar) {
    progress_bar_clear_line();
    progress_bar_message(1, 1, "    preparing");
  }


  table = (int *)cl_calloc(corp->size, sizeof(int));

  EvaluationIsRunning = 1;
  nr_evals = 0;
  percentage = -1;

  for (line = 0; line < corp->size && EvaluationIsRunning; line++) {

    if (progress_bar) {
      new_percentage = floor(0.5 + (100.0 * line) / corp->size);
      if (new_percentage > percentage) {
        percentage = new_percentage;
        progress_bar_percentage(0, 0, percentage);
      }
    }

    table[line] = -1;

    switch(base) {
    case MatchField:

      excl_start = corp->range[line].start;
      excl_end   = corp->range[line].end;

      if ((corp->range[line].start == corp->range[line].end) || inclusive) {

        if (calculate_ranges(corp,
                             corp->range[line].start, context,
                             &lbound, &rbound) == False) {

          Rprintf( "Can't compute boundaries for range #%d", line);
          lbound = rbound = -1;
        }
      }
      else {

        int dummy;

        if (calculate_ranges(corp,
                             corp->range[line].start, context,
                             &lbound, &dummy) == False) {

          Rprintf( "Can't compute left search space boundary match #%d", line);
          lbound = rbound = -1;
        }
        else if (calculate_ranges(corp,
                                  corp->range[line].end, context,
                                  &dummy, &rbound) == False) {

          Rprintf( "Can't compute right search space boundary match #%d", line);
          lbound = rbound = -1;
        }
      }
      break;

    case MatchEndField:
      excl_start = excl_end = corp->range[line].end;

      if (excl_start >= 0) {
        if (calculate_ranges(corp,
                             corp->range[line].end, context,
                             &lbound, &rbound) == False) {

          Rprintf( "Can't compute search space boundaries for match #%d", line);
          lbound = rbound = -1;
        }
      }
      else 
        lbound = rbound = -1;

      break;

    case TargetField:
      excl_start = excl_end = corp->targets[line];

      if (excl_start >= 0) {
        if (calculate_ranges(corp,
                             corp->targets[line], context,
                                  &lbound, &rbound) == False) {

          Rprintf( "Can't compute search space boundaries for match #%d", line);
          lbound = rbound = -1;
        }
      }
      else 
        lbound = rbound = -1;

      break;

    case KeywordField:
      excl_start = excl_end = corp->keywords[line];

      if (excl_start >= 0) {
        if (calculate_ranges(corp,
                             corp->keywords[line], context,
                             &lbound, &rbound) == False) {

          Rprintf( "Can't compute search space boundaries for match #%d", line);
          lbound = rbound = -1;
        }
      }
      else 
        lbound = rbound = -1;

      break;
    default:
      assert(0 && "Can't be");
      return 0;
    }

    if ((lbound >= 0) && (rbound >= 0)) {
      
      int dist, maxdist;

      if (direction == left) {
        rbound = excl_start;
        if (strategy == SearchNearest)
          strategy = SearchRightmost;
        else if (strategy == SearchFarthest)
          strategy = SearchLeftmost;
      }
      else if (direction == right) {
        lbound = excl_start;
        if (strategy == SearchNearest)
          strategy = SearchLeftmost;
        else if (strategy == SearchFarthest)
          strategy = SearchRightmost;
      }

      switch (strategy) {
      case SearchFarthest:

        maxdist = MAX(excl_start - lbound, rbound - excl_start);

        assert(maxdist >= 0);

        for (dist = maxdist; dist >= 0; dist--) {

          i = excl_start - dist;

          if (i >= lbound &&
              (inclusive || (i < excl_start)))
            if (eval_bool(constr, NULL, i)) {
              table[line] = i;
              break;
            }

          i = excl_start + dist;

          if (i <= rbound &&
              (inclusive || (i > excl_end)))
            if (eval_bool(constr, NULL, i)) {
              table[line] = i;
              break;
            }

          nr_evals++;
          if (nr_evals == 1000) {
            CheckForInterrupts();
            nr_evals = 0;
          }

        }
        break;

      case SearchNearest:

        maxdist = MAX(excl_start - lbound, rbound - excl_start);
        assert(maxdist >= 0);

        for (dist = 0; dist <= maxdist; dist++) {

          i = excl_start - dist;

          if (i >= lbound &&
              (inclusive || (i < excl_start)))
            if (eval_bool(constr, NULL, i)) {
              table[line] = i;
              break;
            }

          i = excl_start + dist;

          if (i <= rbound &&
              (inclusive || (i > excl_end)))
            if (eval_bool(constr, NULL, i)) {
              table[line] = i;
              break;
            }

          nr_evals++;
          if (nr_evals == 1000) {
            CheckForInterrupts();
            nr_evals = 0;
          }

        }
        break;

      case SearchLeftmost:
        for (i = lbound; i <= rbound; i++)
          if (inclusive || (i < excl_start) || (i > excl_end)) {
            if (eval_bool(constr, NULL, i)) {
              table[line] = i;
              break;
            }

            nr_evals++;
            if (nr_evals == 1000) {
              CheckForInterrupts();
              nr_evals = 0;
            }
          }
        break;

      case SearchRightmost:
        for (i = rbound; i >= lbound; i--)
          if (inclusive || (i < excl_start) || (i > excl_end)) {
            if (eval_bool(constr, NULL, i)) {
              table[line] = i;
              break;
            }

            nr_evals++;
            if (nr_evals == 1000) {
              CheckForInterrupts();
              nr_evals = 0;
            }
          }
        break;
      default:
        break;
      }
    }
  }

  if (progress_bar) 
    progress_bar_message(1, 1, "  cleaning up");

  switch (t_id) {
  case MatchField:
    for (i = 0; i < corp->size; i++) {
      if (table[i] >= 0) 
        corp->range[i].start = table[i];
      if (corp->range[i].start > corp->range[i].end)
        corp->range[i].start = corp->range[i].end;
    }
    cl_free(table);
    break;

  case MatchEndField:
    for (i = 0; i < corp->size; i++) {
      if (table[i] >= 0) 
        corp->range[i].end = table[i];
      if (corp->range[i].end < corp->range[i].start)
        corp->range[i].end = corp->range[i].start;
    }
    cl_free(table);
    break;

  case TargetField:
    cl_free(corp->targets);
    corp->targets = table;
    break;

  case KeywordField:
    cl_free(corp->keywords);
    corp->keywords = table;
    break;

  default:
    assert(0 && "Can't be");
    break;
  }

  if (progress_bar)
    progress_bar_clear_line();

  if ((t_id == MatchField) || (t_id == MatchEndField))
    RangeSort(corp, 0);                /* re-sort corpus if match regions were modified */

  touch_corpus(corp);  
  if (!EvaluationIsRunning) {
    cqpmessage(Warning, "Evaluation interruted: results may be incomplete.");
    if (which_app == cqp) install_signal_handler();
  }
  EvaluationIsRunning = 0;

  return 1;
}
Пример #18
0
/**
 * Carries out any "side effects" of setting an option.
 *
 * @param opt  The option that has just been set (index into the cqpoptions array).
 *
 * TODO This use of integer indexes as the pass from parse_options is very messy....
 */
void
execute_side_effects(int opt)
{
  switch (cqpoptions[opt].side_effect) {
  case 0:  /* <no side effect> */
    break;
  case 1:  /* set Registry "..."; */
    check_available_corpora(SYSTEM);
    break;
  case 2:  /* set DataDirectory "..."; */
    check_available_corpora(SUB);
    break;
  case 3:  /* set Optimize (on | off); */
    cl_set_optimize(query_optimize); /* enable / disable CL optimisations, too */
    break;
  case 4:  /* set CLDebug (on | off); */
    cl_set_debug_level(activate_cl_debug); /* enable / disable CL debugging */
    break;
    
    /* slot 5 is free */

  case 6:  /* set PrintMode (ascii | sgml | html | latex); */
    if (printModeString == NULL || strcasecmp(printModeString, "ascii") == 0)
      GlobalPrintMode = PrintASCII;
    else if (strcasecmp(printModeString, "sgml") == 0)
      GlobalPrintMode = PrintSGML;
    else if (strcasecmp(printModeString, "html") == 0)
      GlobalPrintMode = PrintHTML;
    else if (strcasecmp(printModeString, "latex") == 0)
      GlobalPrintMode = PrintLATEX;
    else {
      cqpmessage(Error, "USAGE: set PrintMode (ascii | sgml | html | latex);");
      GlobalPrintMode = PrintASCII;
      cl_free(printModeString);
      printModeString = cl_strdup("ascii");
    }
    break;

  case 7:  /* set PrintStructures "..."; */
    if (CD.printStructureTags) {
      DestroyAttributeList(&CD.printStructureTags);
    }
    CD.printStructureTags = ComputePrintStructures(current_corpus);
    break;

  case 8:  /* set PrintOptions "...."; */
    ParsePrintOptions();
    break;

  case 9:  /* set MatchingStrategy ( traditional | shortest | standard | longest ); */
    if (strcasecmp(matching_strategy_name, "traditional") == 0) {
      matching_strategy = traditional;
    }
    else if (strcasecmp(matching_strategy_name, "shortest") == 0) {
      matching_strategy = shortest_match;
    }
    else if (strcasecmp(matching_strategy_name, "standard") == 0) {
      matching_strategy = standard_match;
    }
    else if (strcasecmp(matching_strategy_name, "longest") == 0) {
      matching_strategy = longest_match;
    }
    else {
      cqpmessage(Error, "USAGE: set MatchingStrategy (traditional | shortest | standard | longest);");
      matching_strategy = standard_match;
      cl_free(matching_strategy_name);
      matching_strategy_name = strdup("standard");
    }
    break;
    
  default:
    fprintf(stderr, "Unknown side-effect #%d invoked by option %s.\n", 
            cqpoptions[opt].side_effect, cqpoptions[opt].opt_name);
    assert(0 && "Aborted. Please contact technical support.");
  }
}
Пример #19
0
/* target can be any field except NoField (-> CQP dies), 
   source can be NoField, which deletes the target field (unless that's match or matchend) */
int
set_target(CorpusList *corp, FieldType t_id, FieldType s_id)
{
  int i;

  if (t_id == s_id) {
    cqpmessage(Error, "Fields are identical.");
    return 0;
  }

  if (corp->size == 0) {
    cqpmessage(Error, "Corpus is empty, nothing to be done.");
    return 0;
  }
  assert(corp->range);


  switch (s_id) {
  case NoField:
    switch (t_id) {
    case MatchField:
    case MatchEndField:
      cqpmessage(Error, "Can't delete match or matchend field from %s\n", corp->name);
      break;

    case TargetField:
      cl_free(corp->targets);
      break;

    case KeywordField:
      cl_free(corp->keywords);
      break;
      
    case NoField:
    default:
      assert(0 && "Can't be");
      break;
    }
    break;

  case KeywordField:
    if (corp->keywords == NULL)
      cqpmessage(Error, "No keyword defined for %s\n", corp->name);
    else {

      switch (t_id) {
      case MatchField:
        if (corp->range == NULL) {
          cqpmessage(Error, "Internal error: match ranges not allocated. Abort.");
          return 0;
        }

        for (i = 0; i < corp->size; i++) {
          if (corp->keywords[i] >= 0)
            corp->range[i].start = corp->keywords[i];
          if (corp->range[i].start > corp->range[i].end)
            corp->range[i].start = corp->range[i].end;
        }
        break;

      case MatchEndField:
        if (corp->range == NULL) {
          cqpmessage(Error, "Internal error: match ranges not allocated. Abort.");
          return 0;
        }

        for (i = 0; i < corp->size; i++) {
          if (corp->keywords[i] >= 0)
            corp->range[i].end   = corp->keywords[i];
          if (corp->range[i].end < corp->range[i].start)
            corp->range[i].end = corp->range[i].start;
        }
        break;

      case TargetField:
        if (corp->targets == NULL)
          corp->targets = (int *)cl_malloc(corp->size * sizeof(int));
        /* bcopy(corp->keywords, corp->targets, corp->size * sizeof(int)); */
        memcpy(corp->targets, corp->keywords, corp->size * sizeof(int));
        break;

      case NoField:
      default:
        assert(0 && "Can't be");
        break;
      }

    }
    break;

  case TargetField:
    if (corp->targets == NULL)
      cqpmessage(Error, "No collocates / targets defined for %s\n", corp->name);
    else {

      switch (t_id) {
      case MatchField:
        if (corp->range == NULL) {
          cqpmessage(Error, "Internal error: match ranges not allocated. Abort.");
          return 0;
        }

        for (i = 0; i < corp->size; i++) {
          if (corp->targets[i] >= 0)
            corp->range[i].start = corp->targets[i];
          if (corp->range[i].start > corp->range[i].end)
            corp->range[i].start = corp->range[i].end;
        }
        break;

      case MatchEndField:
        if (corp->range == NULL) {
          cqpmessage(Error, "Internal error: match ranges not allocated. Abort.");
          return 0;
        }

        for (i = 0; i < corp->size; i++) {
          if (corp->targets[i] >= 0)
            corp->range[i].end   = corp->targets[i];
          if (corp->range[i].end < corp->range[i].start)
            corp->range[i].end = corp->range[i].start;
        }
        break;

      case KeywordField:
        if (corp->keywords == NULL)
          corp->keywords = (int *)cl_malloc(corp->size * sizeof(int));
        /* bcopy(corp->targets, corp->keywords, corp->size * sizeof(int)); */
        memcpy(corp->keywords, corp->targets, corp->size * sizeof(int));
        break;

      case NoField:
      default:
        assert(0 && "Can't be");
        break;
      }

    }
    break;

  case MatchField:

    switch (t_id) {
    case MatchEndField:
      for (i = 0; i < corp->size; i++)
        corp->range[i].end = corp->range[i].start;
      break;

    case KeywordField:
      if (corp->keywords == NULL)
        corp->keywords = (int *)cl_malloc(corp->size * sizeof(int));
      for (i = 0; i < corp->size; i++)
        corp->keywords[i] = corp->range[i].start;
      break;

    case TargetField:
      if (corp->targets == NULL)
        corp->targets = (int *)cl_malloc(corp->size * sizeof(int));
      for (i = 0; i < corp->size; i++)
        corp->targets[i] = corp->range[i].start;
      break;

    case NoField:
    default:
      assert(0 && "Can't be");
      break;
    }

    break;

  case MatchEndField:

    switch (t_id) {
    case MatchField:
      for (i = 0; i < corp->size; i++)
        corp->range[i].start = corp->range[i].end;
      break;
      
    case KeywordField:
      if (corp->keywords == NULL)
        corp->keywords = (int *)cl_malloc(corp->size * sizeof(int));
      for (i = 0; i < corp->size; i++)
        corp->keywords[i] = corp->range[i].end;
      break;

    case TargetField:
      if (corp->targets == NULL)
        corp->targets = (int *)cl_malloc(corp->size * sizeof(int));
      for (i = 0; i < corp->size; i++)
        corp->targets[i] = corp->range[i].end;
      break;

    case NoField:
    default:
      assert(0 && "Can't be");
      break;
    }
    break;

  default:
    assert("Can't be" && 0);
    break;
  }
  
  if ((t_id == MatchField) || (t_id == MatchEndField))
    RangeSort(corp, 0);                /* re-sort corpus if match regions were modified */
  touch_corpus(corp);

  return 1;
}
Пример #20
0
Файл: cqp.c Проект: rforge/rcwb
/**
 * Initialises the CQP program (or cqpserver or cqpcl).
 *
 * This function:
 * - initialises the global variables;
 * - initialises the built-in random number generator;
 * - initialises the macro database;
 * - parses the program options;
 * - reads the initialisation file;
 * - reads the macro initialisation file;
 * - and loads the default corpus, if any.
 *
 * @param argc  The argc from main()
 * @param argv  The argv from main()
 * @return      Always 1.
 */
int
initialize_cqp(int argc, char **argv)
{
  char *home = NULL;
  char *homedrive = NULL;
  char *homepath = NULL;
  char init_file_fullname[CL_MAX_FILENAME_LENGTH];

  /* file handle for initialisation files, if any */
  FILE *cqprc;

  extern int yydebug;

  /* initialize global variables */

  exit_cqp = 0;
  cqp_file_p = 0;

  corpuslist = NULL;

  eep = -1;

  /* intialise built-in random number generator */
  cl_randomize();

  /* initialise macro database */
  init_macros();

  /* parse program options */
  parse_options(argc, argv);

  /* let's always run stdout unbuffered */
  /*  if (batchmode || rangeoutput || insecure || !isatty(fileno(stdout))) */
  if (setvbuf(stdout, NULL, _IONBF, 0) != 0)
    perror("unbuffer stdout");

  yydebug = parser_debug;

  /* before we start looking for files, let's get the home directory, if we can,
   * so we don't have to detect it in more than one place. */
#ifndef __MINGW__
  home = (char *)getenv("HOME");
#else
  /* under Windows it is %HOMEDRIVE%%HOMEPATH% */
  if ((homepath = (char *)getenv("HOMEPATH")) != NULL && (homedrive = (char *)getenv("HOMEDRIVE")) != NULL )  {
    home = (char *)cl_malloc(256);
    sprintf(home, "%s%s", homedrive, homepath);
  }
#endif
  /* note that either way above, home is NULL if the needed env var(s) were not found. */


  /* read initialization file if specified via -I, or if we are in interactive mode */
  if (cqp_init_file ||
      (!child_process && (!batchmode || batchfd == NULL) && which_app != cqpserver)
      ) {

    /*
     * Read init file specified with -I <file>
     *   if no init file was specified, and we're not in batchmode, child mode, or cqpserver,
     *   looks for ~/.cqprc
     * Same with macro init file (-M <file> or ~/.cqpmacros), but ONLY if macros are enabled.
     */

    /*
     * allow interactive commands during processing of initialization file ???
     * (I don't think this is the case!!)
     */

    init_file_fullname[0] = '\0';

    /* read init file specified with -I , otherwise look for $HOME/.cqprc */
    if (cqp_init_file)
      sprintf(init_file_fullname, "%s", cqp_init_file);
    else if (home)
      sprintf(init_file_fullname, "%s%c%s", home, SUBDIR_SEPARATOR, CQPRC_NAME);

    if (init_file_fullname[0] != '\0') {
      if ((cqprc = fopen(init_file_fullname, "r")) != NULL) {

        reading_cqprc = 1;        /* not good for very much, really */
        if (!cqp_parse_file(cqprc, 1)) {
          fprintf(stderr, "Parse errors while reading %s, exiting.\n",
                  init_file_fullname);
          exit(1);
        }
        reading_cqprc = 0;

        /* fclose(cqprc);  was already closed by cqp_parse_file!! */
      }
      else if (cqp_init_file) {
        fprintf(stderr, "Can't read initialization file %s\n",
                init_file_fullname);
        exit(1);
      }
    }
  }

  if (!enable_macros && macro_init_file)
    cqpmessage(Warning, "Macros not enabled. Ignoring macro init file %s.", macro_init_file);

  if (enable_macros &&
      (macro_init_file ||
       (!child_process && (!batchmode || (batchfd == NULL)) && !(which_app == cqpserver))
       )
      ) {

    init_file_fullname[0] = '\0';

    /* read macro init file specified with -M , otherwise look for ~/.cqpmacros */
    if (macro_init_file)
      sprintf(init_file_fullname, "%s", macro_init_file);
    else if (home)
      sprintf(init_file_fullname, "%s%c%s", home, SUBDIR_SEPARATOR, CQPMACRORC_NAME);

    if (init_file_fullname[0] != '\0') {
      if ((cqprc = fopen(init_file_fullname, "r")) != NULL) {

        reading_cqprc = 1;        /* not good for very much, really */
        if (!cqp_parse_file(cqprc, 1)) {
          fprintf(stderr, "Parse errors while reading %s, exiting.\n",
                  init_file_fullname);
          exit(1);
        }
        reading_cqprc = 0;

        /* fclose(cqprc);  was already closed by cqp_parse_file!! */
      }
      else if (macro_init_file) {
        fprintf(stderr, "Can't read macro initialization file %s\n",
                init_file_fullname);
        exit(1);
      }
    }
  } /* ends if (!child_process || (batchmode ... ) ... ) */

  check_available_corpora(UNDEF);

  /* load the default corpus. */
  if ((default_corpus) && !set_current_corpus_name(default_corpus, 0)) {
    fprintf(stderr, "Can't set current corpus to default corpus %s, exiting.\n",
            default_corpus);
    exit(1);
  }

#ifndef __MINGW__
  if (signal(SIGPIPE, SIG_IGN) == SIG_IGN) {
    /* fprintf(stderr, "Couldn't install SIG_IGN for SIGPIPE signal\n"); */
    /* -- be silent about not being able to ignore the SIGPIPE signal, which often happens in slave mode */
    /* note that SIGPIPE does not seem to exist in signal.h under MinGW */
    signal(SIGPIPE, SIG_DFL);
  }
#endif

#ifdef __MINGW__
  /* due to how the home path was calculated, home contains a malloc'ed string */
  cl_free(home);
#endif

  return 1;
}
Пример #21
0
/**
 * Computes a list of s-attributes to print from the PrintStructure global option setting.
 *
 * PrintStructure is itself updated.
 *
 * @param cl  The corpus from which to find the attributes.
 * @return    An attribute list containing the attributes to be printed.
 */
AttributeList *
ComputePrintStructures(CorpusList *cl)
{
  if (printStructure == NULL || printStructure[0] == '\0' || cl == NULL)
    return NULL;
  else {
    char *token, *p;
    AttributeList *al;
    AttributeInfo *ai;
    Attribute *struc;

    al = NULL;
    struc = NULL;

    token = strtok(printStructure, PRINT_STRUC_SEP);

    if (!token)
      return NULL;

    while (token) {

      if ((struc = find_attribute(cl->corpus, token, ATT_STRUC, NULL))
          == NULL) {
        cqpmessage(Warning,
                   "Structure ``%s'' not declared for corpus ``%s''.",
                   token, cl->corpus->registry_name);
      }
      else if (!structure_has_values(struc)) {
        cqpmessage(Warning, "Structure ``%s'' does not have any values.",
                   token);
        struc = NULL;
      }

      if (struc) {
        if (al == NULL)
          al = NewAttributeList(ATT_STRUC);

        (void) AddNameToAL(al, token, 1, 0);
      }
      token = strtok(NULL, PRINT_STRUC_SEP);
    }

    if (al) {
      if (!VerifyList(al, cl->corpus, 1)) {
        cqpmessage(Error,
                   "Problems while computing print structure list");
        DestroyAttributeList(&al);
        al = NULL;
      }
      else if (!al->list)
        DestroyAttributeList(&al);
    }

    /* rebuild printStructure string to show only valid attributes */
    p = printStructure;
    *p = '\0';
    ai = (al) ? al->list : NULL;
    while (ai != NULL) {
      if (p != printStructure)
        *p++ = ' ';                /* insert blank between attributes */
      sprintf(p, "%s", ai->attribute->any.name);
      p += strlen(p);
      ai = ai->next;
    }

    return al;
  }
  assert(0 && "Not reached ;-|");
  return NULL;
}
Пример #22
0
/**
 * Open the stream within a Redir structure.
 *
 * @param rd       Redir structure to be opened.
 * @param charset  The charset to be used. Only has an effect if the stream
 *                 to be opened is to an output pager.
 * @return         True for success, false for failure.
 */
int
open_stream(struct Redir *rd, CorpusCharset charset)
{
  int i;

  assert(rd);

  if (rd->name) {
    i = 0;
    while (rd->name[i] == ' ')
      i++;
    
    if ((rd->name[i] == '|') &&
        (rd->name[i+1] != '\0')) {
      
      if (insecure) {
        /* set stream to NULL to force return value of 0 */
        rd->stream = NULL;
        rd->is_pipe = False;
        rd->is_paging = False;
      }
      else {
        
        /* we send the output to a pipe */
        rd->is_pipe = True;
        rd->is_paging = False;
        rd->stream = popen(rd->name+i+1, rd->mode);
      }
    }
    else {

      /* normal output to file */
      rd->is_pipe = False;
      rd->is_paging = False;
      rd->stream = open_file(rd->name, rd->mode);
    }
  }
  else { /* i.e. if rd->name is NULL */
    if (pager && paging && isatty(fileno(NULL))) {
      if (insecure) {
        cqpmessage(Error, "Insecure mode, paging not allowed.\n");
        /* ... and default back to bare stdout */
        rd->stream = NULL;
        rd->is_paging = False;
        rd->is_pipe = False;
      }
      else if ((rd->stream = open_pager(pager, charset)) == NULL) {
        cqpmessage(Warning, "Could not start pager '%s', trying fallback '%s'.\n", pager, CQP_FALLBACK_PAGER);
        if ((rd->stream = open_pager(CQP_FALLBACK_PAGER, charset)) == NULL) {
          cqpmessage(Warning, "Could not start fallback pager '%s'. Paging disabled.\n", CQP_FALLBACK_PAGER);
          set_integer_option_value("Paging", 0);
          rd->is_pipe = False;
          rd->is_paging = False;
          rd->stream = NULL;
        }
        else {
          rd->is_pipe = 1;
          rd->is_paging = True;
          set_string_option_value("Pager", cl_strdup(CQP_FALLBACK_PAGER));
        }
      }
      else {
        rd->is_pipe = 1;
        rd->is_paging = True;
      }
    }
    else {
      rd->stream = NULL;
      rd->is_paging = False;
      rd->is_pipe = False;
    }
  }
  return (rd->stream == NULL ? 0 : 1);
}