Esempio n. 1
0
File: groups.c Progetto: cran/rcqp
int
sum_freqs(ID_Count_Mapping *buffer, int bufsize, int cutoff_f)
{
  int i, insp;

  insp = 0; 
  i = 0;

  if (progress_bar) 
    progress_bar_message(2, 2, " cutoff freq.");

  for (i = 0; i < bufsize; i++) {
    if (buffer[i].freq >= cutoff_f) {
      buffer[insp].s = buffer[i].s;
      buffer[insp].t = buffer[i].t;
      buffer[insp].freq = buffer[i].freq;
      insp++;
    }
  }

  if (progress_bar) 
    progress_bar_message(2, 2, " sorting rslt");

  qsort(buffer, insp, sizeof(ID_Count_Mapping), compare_by_freq);

  if (progress_bar) 
    progress_bar_percentage(2, 2, 100); /* so total percentage runs up to 100% */

  return insp;
}
Esempio n. 2
0
int evaluate_subset(CorpusList *cl, /* the corpus */
                    FieldType the_field,       /* the field to scan */
                    Constrainttree constr)
{
  int line, position;
  int percentage, new_percentage; /* for ProgressBar */

  assert(cl && constr);
  assert(cl->type == SUB || cl->type == TEMP);

  percentage = -1;

  EvaluationIsRunning = 1;
  for (line = 0; (line < cl->size) && EvaluationIsRunning; line++) {

    if (progress_bar) {
      new_percentage = floor(0.5 + (100.0 * line) / cl->size);
      if (new_percentage > percentage) {
        percentage = new_percentage;
        progress_bar_percentage(0, 0, percentage);
      }
    }

    switch (the_field) {
    
    case MatchField:
      position = cl->range[line].start;
      break;
      
    case MatchEndField:
      position = cl->range[line].end;
      break;
      
    case KeywordField:
      assert(cl->keywords);
      position = cl->keywords[line];
      break;
      
    case TargetField:
      assert(cl->targets);
      position = cl->targets[line];
      break;
      
    case NoField:
    default:
      position = -1;
      break;
    }

    if (position < 0 || (!eval_bool(constr, NULL, position))) {
      cl->range[line].start = -1;
      cl->range[line].end   = -1;
    }
  }
  
  /* if interrupted, delete part of temporary query result which hasn't been filtered;
     so that the result is incomplete but at least contains only correct matches */
  while (line < cl->size) {
    cl->range[line].start = -1;
    cl->range[line].end   = -1;
    line++;
  }

  if (!EvaluationIsRunning) {
    cqpmessage(Warning, "Evaluation interruted: results may be incomplete.");
    if (which_app == cqp) install_signal_handler();
  }
  EvaluationIsRunning = 0;

  if (progress_bar) 
    progress_bar_message(0, 0, "  cleaning up");

  (void) RangeSetop(cl, RReduce, NULL, NULL);

  return 1;
}
Esempio n. 3
0
int evaluate_target(CorpusList *corp,          /* the corpus */
                    FieldType t_id,            /* the field to set */
                    FieldType base,            /* where to start the search */
                    int inclusive,             /* including or excluding the base */
                    SearchStrategy strategy,   /* disambiguation rule: which item */
                    Constrainttree constr,     /* the constraint */
                    enum ctxtdir direction,    /* context direction */
                    int units,                       /* number of units */
                    char *attr_name)           /* name of unit */
{
  Attribute *attr;
  int *table;
  Context context;
  int i, line, lbound, rbound;
  int excl_start, excl_end;
  int nr_evals;
  int percentage, new_percentage; /* for ProgressBar */

  /* ------------------------------------------------------------ */

  assert(corp);

  /* consistency check */
  assert(t_id == TargetField || t_id == KeywordField || t_id == MatchField || t_id == MatchEndField);

  if (!constr) {
    cqpmessage(Error, "Constraing pattern missing in 'set target' command.");
    return 0;
  }

  if (corp->size <= 0) {
    cqpmessage(Error, "Corpus is empty.");
    return 0;
  }

  /*
   * check whether the base field specification is ok
   */
  switch(base) {
  case MatchField:
  case MatchEndField:
    if (corp->range == NULL) {
      cqpmessage(Error, "No ranges for start of search");
      return 0;
    }
    break;
  case TargetField:
    if (corp->targets == NULL) {
      cqpmessage(Error, "Can't start from base TARGET, none defined");
      return 0;
    }
    break;
  case KeywordField:
    if (corp->keywords == NULL) {
      cqpmessage(Error, "Can't start from base KEYWORD, none defined");
      return 0;
    }
    break;
  default:
    cqpmessage(Error, "Illegal base field (#%d) in 'set target' command.",
               base);
    return 0;
  }

  if (units <= 0) {
    cqpmessage(Error, "Invalid search space (%d units) in 'set target' command.", 
               units);
    return 0;
  }

  /* THIS SHOULD BE UNNECESSARY, BECAUSE THE GRAMMAR MAKES SURE THE SUBCORPUS EXISTS & IS LOADED */
  /*   if (!access_corpus(corp)) { */
  /*     cqpmessage(Error, "Can't access named query %s.", corp->name); */
  /*     return 0; */
  /*   } */

  context.size = units;
  context.direction = direction;

  if ((strcasecmp(attr_name, "word") == 0) ||
      (strcasecmp(attr_name, "words") == 0)) {
    attr = find_attribute(corp->corpus, DEFAULT_ATT_NAME, ATT_POS, NULL);
    context.type = word;
    context.attrib = NULL;
  }
  else {
    attr = find_attribute(corp->corpus, attr_name, ATT_STRUC, NULL);
    context.type = structure;
    context.attrib = attr;
  }

  if (attr == NULL) {
    cqpmessage(Error, "Can't find attribute %s.%s",
               corp->mother_name, attr_name);
    return 0;
  }

  if (progress_bar) {
    progress_bar_clear_line();
    progress_bar_message(1, 1, "    preparing");
  }


  table = (int *)cl_calloc(corp->size, sizeof(int));

  EvaluationIsRunning = 1;
  nr_evals = 0;
  percentage = -1;

  for (line = 0; line < corp->size && EvaluationIsRunning; line++) {

    if (progress_bar) {
      new_percentage = floor(0.5 + (100.0 * line) / corp->size);
      if (new_percentage > percentage) {
        percentage = new_percentage;
        progress_bar_percentage(0, 0, percentage);
      }
    }

    table[line] = -1;

    switch(base) {
    case MatchField:

      excl_start = corp->range[line].start;
      excl_end   = corp->range[line].end;

      if ((corp->range[line].start == corp->range[line].end) || inclusive) {

        if (calculate_ranges(corp,
                             corp->range[line].start, context,
                             &lbound, &rbound) == False) {

          Rprintf( "Can't compute boundaries for range #%d", line);
          lbound = rbound = -1;
        }
      }
      else {

        int dummy;

        if (calculate_ranges(corp,
                             corp->range[line].start, context,
                             &lbound, &dummy) == False) {

          Rprintf( "Can't compute left search space boundary match #%d", line);
          lbound = rbound = -1;
        }
        else if (calculate_ranges(corp,
                                  corp->range[line].end, context,
                                  &dummy, &rbound) == False) {

          Rprintf( "Can't compute right search space boundary match #%d", line);
          lbound = rbound = -1;
        }
      }
      break;

    case MatchEndField:
      excl_start = excl_end = corp->range[line].end;

      if (excl_start >= 0) {
        if (calculate_ranges(corp,
                             corp->range[line].end, context,
                             &lbound, &rbound) == False) {

          Rprintf( "Can't compute search space boundaries for match #%d", line);
          lbound = rbound = -1;
        }
      }
      else 
        lbound = rbound = -1;

      break;

    case TargetField:
      excl_start = excl_end = corp->targets[line];

      if (excl_start >= 0) {
        if (calculate_ranges(corp,
                             corp->targets[line], context,
                                  &lbound, &rbound) == False) {

          Rprintf( "Can't compute search space boundaries for match #%d", line);
          lbound = rbound = -1;
        }
      }
      else 
        lbound = rbound = -1;

      break;

    case KeywordField:
      excl_start = excl_end = corp->keywords[line];

      if (excl_start >= 0) {
        if (calculate_ranges(corp,
                             corp->keywords[line], context,
                             &lbound, &rbound) == False) {

          Rprintf( "Can't compute search space boundaries for match #%d", line);
          lbound = rbound = -1;
        }
      }
      else 
        lbound = rbound = -1;

      break;
    default:
      assert(0 && "Can't be");
      return 0;
    }

    if ((lbound >= 0) && (rbound >= 0)) {
      
      int dist, maxdist;

      if (direction == left) {
        rbound = excl_start;
        if (strategy == SearchNearest)
          strategy = SearchRightmost;
        else if (strategy == SearchFarthest)
          strategy = SearchLeftmost;
      }
      else if (direction == right) {
        lbound = excl_start;
        if (strategy == SearchNearest)
          strategy = SearchLeftmost;
        else if (strategy == SearchFarthest)
          strategy = SearchRightmost;
      }

      switch (strategy) {
      case SearchFarthest:

        maxdist = MAX(excl_start - lbound, rbound - excl_start);

        assert(maxdist >= 0);

        for (dist = maxdist; dist >= 0; dist--) {

          i = excl_start - dist;

          if (i >= lbound &&
              (inclusive || (i < excl_start)))
            if (eval_bool(constr, NULL, i)) {
              table[line] = i;
              break;
            }

          i = excl_start + dist;

          if (i <= rbound &&
              (inclusive || (i > excl_end)))
            if (eval_bool(constr, NULL, i)) {
              table[line] = i;
              break;
            }

          nr_evals++;
          if (nr_evals == 1000) {
            CheckForInterrupts();
            nr_evals = 0;
          }

        }
        break;

      case SearchNearest:

        maxdist = MAX(excl_start - lbound, rbound - excl_start);
        assert(maxdist >= 0);

        for (dist = 0; dist <= maxdist; dist++) {

          i = excl_start - dist;

          if (i >= lbound &&
              (inclusive || (i < excl_start)))
            if (eval_bool(constr, NULL, i)) {
              table[line] = i;
              break;
            }

          i = excl_start + dist;

          if (i <= rbound &&
              (inclusive || (i > excl_end)))
            if (eval_bool(constr, NULL, i)) {
              table[line] = i;
              break;
            }

          nr_evals++;
          if (nr_evals == 1000) {
            CheckForInterrupts();
            nr_evals = 0;
          }

        }
        break;

      case SearchLeftmost:
        for (i = lbound; i <= rbound; i++)
          if (inclusive || (i < excl_start) || (i > excl_end)) {
            if (eval_bool(constr, NULL, i)) {
              table[line] = i;
              break;
            }

            nr_evals++;
            if (nr_evals == 1000) {
              CheckForInterrupts();
              nr_evals = 0;
            }
          }
        break;

      case SearchRightmost:
        for (i = rbound; i >= lbound; i--)
          if (inclusive || (i < excl_start) || (i > excl_end)) {
            if (eval_bool(constr, NULL, i)) {
              table[line] = i;
              break;
            }

            nr_evals++;
            if (nr_evals == 1000) {
              CheckForInterrupts();
              nr_evals = 0;
            }
          }
        break;
      default:
        break;
      }
    }
  }

  if (progress_bar) 
    progress_bar_message(1, 1, "  cleaning up");

  switch (t_id) {
  case MatchField:
    for (i = 0; i < corp->size; i++) {
      if (table[i] >= 0) 
        corp->range[i].start = table[i];
      if (corp->range[i].start > corp->range[i].end)
        corp->range[i].start = corp->range[i].end;
    }
    cl_free(table);
    break;

  case MatchEndField:
    for (i = 0; i < corp->size; i++) {
      if (table[i] >= 0) 
        corp->range[i].end = table[i];
      if (corp->range[i].end < corp->range[i].start)
        corp->range[i].end = corp->range[i].start;
    }
    cl_free(table);
    break;

  case TargetField:
    cl_free(corp->targets);
    corp->targets = table;
    break;

  case KeywordField:
    cl_free(corp->keywords);
    corp->keywords = table;
    break;

  default:
    assert(0 && "Can't be");
    break;
  }

  if (progress_bar)
    progress_bar_clear_line();

  if ((t_id == MatchField) || (t_id == MatchEndField))
    RangeSort(corp, 0);                /* re-sort corpus if match regions were modified */

  touch_corpus(corp);  
  if (!EvaluationIsRunning) {
    cqpmessage(Warning, "Evaluation interruted: results may be incomplete.");
    if (which_app == cqp) install_signal_handler();
  }
  EvaluationIsRunning = 0;

  return 1;
}