Exemplo n.º 1
0
int evaluate_target(CorpusList *corp,          /* the corpus */
                    FieldType t_id,            /* the field to set */
                    FieldType base,            /* where to start the search */
                    int inclusive,             /* including or excluding the base */
                    SearchStrategy strategy,   /* disambiguation rule: which item */
                    Constrainttree constr,     /* the constraint */
                    enum ctxtdir direction,    /* context direction */
                    int units,                       /* number of units */
                    char *attr_name)           /* name of unit */
{
  Attribute *attr;
  int *table;
  Context context;
  int i, line, lbound, rbound;
  int excl_start, excl_end;
  int nr_evals;
  int percentage, new_percentage; /* for ProgressBar */

  /* ------------------------------------------------------------ */

  assert(corp);

  /* consistency check */
  assert(t_id == TargetField || t_id == KeywordField || t_id == MatchField || t_id == MatchEndField);

  if (!constr) {
    cqpmessage(Error, "Constraing pattern missing in 'set target' command.");
    return 0;
  }

  if (corp->size <= 0) {
    cqpmessage(Error, "Corpus is empty.");
    return 0;
  }

  /*
   * check whether the base field specification is ok
   */
  switch(base) {
  case MatchField:
  case MatchEndField:
    if (corp->range == NULL) {
      cqpmessage(Error, "No ranges for start of search");
      return 0;
    }
    break;
  case TargetField:
    if (corp->targets == NULL) {
      cqpmessage(Error, "Can't start from base TARGET, none defined");
      return 0;
    }
    break;
  case KeywordField:
    if (corp->keywords == NULL) {
      cqpmessage(Error, "Can't start from base KEYWORD, none defined");
      return 0;
    }
    break;
  default:
    cqpmessage(Error, "Illegal base field (#%d) in 'set target' command.",
               base);
    return 0;
  }

  if (units <= 0) {
    cqpmessage(Error, "Invalid search space (%d units) in 'set target' command.", 
               units);
    return 0;
  }

  /* THIS SHOULD BE UNNECESSARY, BECAUSE THE GRAMMAR MAKES SURE THE SUBCORPUS EXISTS & IS LOADED */
  /*   if (!access_corpus(corp)) { */
  /*     cqpmessage(Error, "Can't access named query %s.", corp->name); */
  /*     return 0; */
  /*   } */

  context.size = units;
  context.direction = direction;

  if ((strcasecmp(attr_name, "word") == 0) ||
      (strcasecmp(attr_name, "words") == 0)) {
    attr = find_attribute(corp->corpus, DEFAULT_ATT_NAME, ATT_POS, NULL);
    context.type = word;
    context.attrib = NULL;
  }
  else {
    attr = find_attribute(corp->corpus, attr_name, ATT_STRUC, NULL);
    context.type = structure;
    context.attrib = attr;
  }

  if (attr == NULL) {
    cqpmessage(Error, "Can't find attribute %s.%s",
               corp->mother_name, attr_name);
    return 0;
  }

  if (progress_bar) {
    progress_bar_clear_line();
    progress_bar_message(1, 1, "    preparing");
  }


  table = (int *)cl_calloc(corp->size, sizeof(int));

  EvaluationIsRunning = 1;
  nr_evals = 0;
  percentage = -1;

  for (line = 0; line < corp->size && EvaluationIsRunning; line++) {

    if (progress_bar) {
      new_percentage = floor(0.5 + (100.0 * line) / corp->size);
      if (new_percentage > percentage) {
        percentage = new_percentage;
        progress_bar_percentage(0, 0, percentage);
      }
    }

    table[line] = -1;

    switch(base) {
    case MatchField:

      excl_start = corp->range[line].start;
      excl_end   = corp->range[line].end;

      if ((corp->range[line].start == corp->range[line].end) || inclusive) {

        if (calculate_ranges(corp,
                             corp->range[line].start, context,
                             &lbound, &rbound) == False) {

          Rprintf( "Can't compute boundaries for range #%d", line);
          lbound = rbound = -1;
        }
      }
      else {

        int dummy;

        if (calculate_ranges(corp,
                             corp->range[line].start, context,
                             &lbound, &dummy) == False) {

          Rprintf( "Can't compute left search space boundary match #%d", line);
          lbound = rbound = -1;
        }
        else if (calculate_ranges(corp,
                                  corp->range[line].end, context,
                                  &dummy, &rbound) == False) {

          Rprintf( "Can't compute right search space boundary match #%d", line);
          lbound = rbound = -1;
        }
      }
      break;

    case MatchEndField:
      excl_start = excl_end = corp->range[line].end;

      if (excl_start >= 0) {
        if (calculate_ranges(corp,
                             corp->range[line].end, context,
                             &lbound, &rbound) == False) {

          Rprintf( "Can't compute search space boundaries for match #%d", line);
          lbound = rbound = -1;
        }
      }
      else 
        lbound = rbound = -1;

      break;

    case TargetField:
      excl_start = excl_end = corp->targets[line];

      if (excl_start >= 0) {
        if (calculate_ranges(corp,
                             corp->targets[line], context,
                                  &lbound, &rbound) == False) {

          Rprintf( "Can't compute search space boundaries for match #%d", line);
          lbound = rbound = -1;
        }
      }
      else 
        lbound = rbound = -1;

      break;

    case KeywordField:
      excl_start = excl_end = corp->keywords[line];

      if (excl_start >= 0) {
        if (calculate_ranges(corp,
                             corp->keywords[line], context,
                             &lbound, &rbound) == False) {

          Rprintf( "Can't compute search space boundaries for match #%d", line);
          lbound = rbound = -1;
        }
      }
      else 
        lbound = rbound = -1;

      break;
    default:
      assert(0 && "Can't be");
      return 0;
    }

    if ((lbound >= 0) && (rbound >= 0)) {
      
      int dist, maxdist;

      if (direction == left) {
        rbound = excl_start;
        if (strategy == SearchNearest)
          strategy = SearchRightmost;
        else if (strategy == SearchFarthest)
          strategy = SearchLeftmost;
      }
      else if (direction == right) {
        lbound = excl_start;
        if (strategy == SearchNearest)
          strategy = SearchLeftmost;
        else if (strategy == SearchFarthest)
          strategy = SearchRightmost;
      }

      switch (strategy) {
      case SearchFarthest:

        maxdist = MAX(excl_start - lbound, rbound - excl_start);

        assert(maxdist >= 0);

        for (dist = maxdist; dist >= 0; dist--) {

          i = excl_start - dist;

          if (i >= lbound &&
              (inclusive || (i < excl_start)))
            if (eval_bool(constr, NULL, i)) {
              table[line] = i;
              break;
            }

          i = excl_start + dist;

          if (i <= rbound &&
              (inclusive || (i > excl_end)))
            if (eval_bool(constr, NULL, i)) {
              table[line] = i;
              break;
            }

          nr_evals++;
          if (nr_evals == 1000) {
            CheckForInterrupts();
            nr_evals = 0;
          }

        }
        break;

      case SearchNearest:

        maxdist = MAX(excl_start - lbound, rbound - excl_start);
        assert(maxdist >= 0);

        for (dist = 0; dist <= maxdist; dist++) {

          i = excl_start - dist;

          if (i >= lbound &&
              (inclusive || (i < excl_start)))
            if (eval_bool(constr, NULL, i)) {
              table[line] = i;
              break;
            }

          i = excl_start + dist;

          if (i <= rbound &&
              (inclusive || (i > excl_end)))
            if (eval_bool(constr, NULL, i)) {
              table[line] = i;
              break;
            }

          nr_evals++;
          if (nr_evals == 1000) {
            CheckForInterrupts();
            nr_evals = 0;
          }

        }
        break;

      case SearchLeftmost:
        for (i = lbound; i <= rbound; i++)
          if (inclusive || (i < excl_start) || (i > excl_end)) {
            if (eval_bool(constr, NULL, i)) {
              table[line] = i;
              break;
            }

            nr_evals++;
            if (nr_evals == 1000) {
              CheckForInterrupts();
              nr_evals = 0;
            }
          }
        break;

      case SearchRightmost:
        for (i = rbound; i >= lbound; i--)
          if (inclusive || (i < excl_start) || (i > excl_end)) {
            if (eval_bool(constr, NULL, i)) {
              table[line] = i;
              break;
            }

            nr_evals++;
            if (nr_evals == 1000) {
              CheckForInterrupts();
              nr_evals = 0;
            }
          }
        break;
      default:
        break;
      }
    }
  }

  if (progress_bar) 
    progress_bar_message(1, 1, "  cleaning up");

  switch (t_id) {
  case MatchField:
    for (i = 0; i < corp->size; i++) {
      if (table[i] >= 0) 
        corp->range[i].start = table[i];
      if (corp->range[i].start > corp->range[i].end)
        corp->range[i].start = corp->range[i].end;
    }
    cl_free(table);
    break;

  case MatchEndField:
    for (i = 0; i < corp->size; i++) {
      if (table[i] >= 0) 
        corp->range[i].end = table[i];
      if (corp->range[i].end < corp->range[i].start)
        corp->range[i].end = corp->range[i].start;
    }
    cl_free(table);
    break;

  case TargetField:
    cl_free(corp->targets);
    corp->targets = table;
    break;

  case KeywordField:
    cl_free(corp->keywords);
    corp->keywords = table;
    break;

  default:
    assert(0 && "Can't be");
    break;
  }

  if (progress_bar)
    progress_bar_clear_line();

  if ((t_id == MatchField) || (t_id == MatchEndField))
    RangeSort(corp, 0);                /* re-sort corpus if match regions were modified */

  touch_corpus(corp);  
  if (!EvaluationIsRunning) {
    cqpmessage(Warning, "Evaluation interruted: results may be incomplete.");
    if (which_app == cqp) install_signal_handler();
  }
  EvaluationIsRunning = 0;

  return 1;
}
Exemplo n.º 2
0
int evaluate_subset(CorpusList *cl, /* the corpus */
                    FieldType the_field,       /* the field to scan */
                    Constrainttree constr)
{
  int line, position;
  int percentage, new_percentage; /* for ProgressBar */

  assert(cl && constr);
  assert(cl->type == SUB || cl->type == TEMP);

  percentage = -1;

  EvaluationIsRunning = 1;
  for (line = 0; (line < cl->size) && EvaluationIsRunning; line++) {

    if (progress_bar) {
      new_percentage = floor(0.5 + (100.0 * line) / cl->size);
      if (new_percentage > percentage) {
        percentage = new_percentage;
        progress_bar_percentage(0, 0, percentage);
      }
    }

    switch (the_field) {
    
    case MatchField:
      position = cl->range[line].start;
      break;
      
    case MatchEndField:
      position = cl->range[line].end;
      break;
      
    case KeywordField:
      assert(cl->keywords);
      position = cl->keywords[line];
      break;
      
    case TargetField:
      assert(cl->targets);
      position = cl->targets[line];
      break;
      
    case NoField:
    default:
      position = -1;
      break;
    }

    if (position < 0 || (!eval_bool(constr, NULL, position))) {
      cl->range[line].start = -1;
      cl->range[line].end   = -1;
    }
  }
  
  /* if interrupted, delete part of temporary query result which hasn't been filtered;
     so that the result is incomplete but at least contains only correct matches */
  while (line < cl->size) {
    cl->range[line].start = -1;
    cl->range[line].end   = -1;
    line++;
  }

  if (!EvaluationIsRunning) {
    cqpmessage(Warning, "Evaluation interruted: results may be incomplete.");
    if (which_app == cqp) install_signal_handler();
  }
  EvaluationIsRunning = 0;

  if (progress_bar) 
    progress_bar_message(0, 0, "  cleaning up");

  (void) RangeSetop(cl, RReduce, NULL, NULL);

  return 1;
}
Exemplo n.º 3
0
int
ged_comb_std(struct ged *gedp, int argc, const char *argv[])
{
    char *comb_name;
    int ch;
    int region_flag = -1;
    struct directory *dp = RT_DIR_NULL;
    struct rt_db_internal intern;
    struct rt_comb_internal *comb = NULL;
    struct tokens tok_hd;
    short last_tok;
    int i;
    union tree *final_tree;
    static const char *usage = "[-cr] comb_name <boolean_expr>";

    GED_CHECK_DATABASE_OPEN(gedp, GED_ERROR);
    GED_CHECK_READ_ONLY(gedp, GED_ERROR);
    GED_CHECK_ARGC_GT_0(gedp, argc, GED_ERROR);

    /* initialize result */
    bu_vls_trunc(gedp->ged_result_str, 0);

    /* must be wanting help */
    if (argc == 1) {
	bu_vls_printf(gedp->ged_result_str, "Usage: %s %s", argv[0], usage);
	return GED_HELP;
    }

    if (argc < 3) {
	bu_vls_printf(gedp->ged_result_str, "Usage: %s %s", argv[0], usage);
	return GED_ERROR;
    }

    /* Parse options */
    bu_optind = 1;	/* re-init bu_getopt() */
    while ((ch = bu_getopt(argc, (char * const *)argv, "cgr?")) != -1) {
	switch (ch) {
	    case 'c':
	    case 'g':
		region_flag = 0;
		break;
	    case 'r':
		region_flag = 1;
		break;
		/* XXX How about -p and -v for FASTGEN? */
	    case '?':
	    default:
		bu_vls_printf(gedp->ged_result_str, "Usage: %s %s", argv[0], usage);
		return GED_OK;
	}
    }
    argc -= (bu_optind + 1);
    argv += bu_optind;

    comb_name = (char *)*argv++;
    if (argc == -1) {
	bu_vls_printf(gedp->ged_result_str, "Usage: %s %s", argv[0], usage);
	return GED_OK;
    }

    if ((region_flag != -1) && (argc == 0)) {
	/*
	 * Set/Reset the REGION flag of an existing combination
	 */
	GED_DB_LOOKUP(gedp, dp, comb_name, LOOKUP_NOISY, GED_ERROR & GED_QUIET);

	if (!(dp->d_flags & RT_DIR_COMB)) {
	    bu_vls_printf(gedp->ged_result_str, "%s is not a combination\n", comb_name);
	    return GED_ERROR;
	}

	GED_DB_GET_INTERNAL(gedp, &intern, dp, (fastf_t *)NULL, &rt_uniresource, GED_ERROR);
	comb = (struct rt_comb_internal *)intern.idb_ptr;
	RT_CK_COMB(comb);

	if (region_flag) {
	    if (!comb->region_flag) {
		/* assign values from the defaults */
		comb->region_id = gedp->ged_wdbp->wdb_item_default++;
		comb->aircode = gedp->ged_wdbp->wdb_air_default;
		comb->GIFTmater = gedp->ged_wdbp->wdb_mat_default;
		comb->los = gedp->ged_wdbp->wdb_los_default;
	    }
	    comb->region_flag = 1;
	} else
	    comb->region_flag = 0;

	GED_DB_PUT_INTERNAL(gedp, dp, &intern, &rt_uniresource, GED_ERROR);

	return GED_OK;
    }
    /*
     * At this point, we know we have a Boolean expression.
     * If the combination already existed and region_flag is -1,
     * then leave its region_flag alone.
     * If the combination didn't exist yet,
     * then pretend region_flag was 0.
     * Otherwise, make sure to set its c_flags according to region_flag.
     */

    GED_CHECK_EXISTS(gedp, comb_name, LOOKUP_QUIET, GED_ERROR);
    dp = RT_DIR_NULL;

    /* parse Boolean expression */
    BU_LIST_INIT(&tok_hd.l);
    tok_hd.type = TOK_NULL;

    last_tok = TOK_LPAREN;
    for (i = 0; i < argc; i++) {
	char *ptr;

	ptr = (char *)argv[i];
	while (*ptr) {
	    while (*ptr == '(' || *ptr == ')') {
		switch (*ptr) {
		    case '(':
			append_lparen(&tok_hd.l);
			last_tok = TOK_LPAREN;
			break;
		    case ')':
			append_rparen(&tok_hd.l);
			last_tok = TOK_RPAREN;
			break;
		}
		ptr++;
	    }

	    if (*ptr == '\0')
		continue;

	    if (last_tok == TOK_RPAREN) {
		/* next token MUST be an operator */
		if (add_operator(gedp, &tok_hd.l, *ptr, &last_tok) == GED_ERROR) {
		    free_tokens(&tok_hd.l);
		    return GED_ERROR;
		}
		ptr++;
	    } else if (last_tok == TOK_LPAREN) {
		/* next token MUST be an operand */
		int name_len;

		name_len = add_operand(gedp, &tok_hd.l, ptr);
		if (name_len < 1) {
		    free_tokens(&tok_hd.l);
		    return GED_ERROR;
		}
		last_tok = TOK_TREE;
		ptr += name_len;
	    } else if (last_tok == TOK_TREE) {
		/* must be an operator */
		if (add_operator(gedp, &tok_hd.l, *ptr, &last_tok) == GED_ERROR) {
		    free_tokens(&tok_hd.l);
		    return GED_ERROR;
		}
		ptr++;
	    } else if (last_tok == TOK_UNION ||
		       last_tok == TOK_INTER ||
		       last_tok == TOK_SUBTR) {
		/* must be an operand */
		int name_len;

		name_len = add_operand(gedp, &tok_hd.l, ptr);
		if (name_len < 1) {
		    free_tokens(&tok_hd.l);
		    return GED_ERROR;
		}
		last_tok = TOK_TREE;
		ptr += name_len;
	    }
	}
    }

    if (check_syntax(gedp, &tok_hd.l, comb_name, dp)) {
	free_tokens(&tok_hd.l);
	return GED_ERROR;
    }

    final_tree = eval_bool(&tok_hd.l);

    {
	int flags;

	flags = RT_DIR_COMB;
	BU_ALLOC(comb, struct rt_comb_internal);
	RT_COMB_INTERNAL_INIT(comb);

	comb->tree = final_tree;

	comb->region_id = -1;
	if (region_flag == (-1))
	    comb->region_flag = 0;
	else
	    comb->region_flag = region_flag;

	if (comb->region_flag) {
	    comb->region_flag = 1;
	    comb->region_id = gedp->ged_wdbp->wdb_item_default++;
	    comb->aircode = gedp->ged_wdbp->wdb_air_default;
	    comb->los = gedp->ged_wdbp->wdb_los_default;
	    comb->GIFTmater = gedp->ged_wdbp->wdb_mat_default;

	    bu_vls_printf(gedp->ged_result_str, "Creating region with attrs: region_id=%d, ", comb->region_id);
	    if (comb->aircode)
		bu_vls_printf(gedp->ged_result_str, "air=%d, ", comb->aircode);
	    bu_vls_printf(gedp->ged_result_str, "los=%d, material_id=%d\n",
			  comb->los,
			  comb->GIFTmater);

	    flags |= RT_DIR_REGION;
	}

	RT_DB_INTERNAL_INIT(&intern);
	intern.idb_major_type = DB5_MAJORTYPE_BRLCAD;
	intern.idb_type = ID_COMBINATION;
	intern.idb_meth = &OBJ[ID_COMBINATION];
	intern.idb_ptr = (void *)comb;

	GED_DB_DIRADD(gedp, dp, comb_name, RT_DIR_PHONY_ADDR, 0, flags, (void *)&intern.idb_type, GED_ERROR);
	GED_DB_PUT_INTERNAL(gedp, dp, &intern, &rt_uniresource, GED_ERROR);
    }

    return GED_OK;
}