int sum_freqs(ID_Count_Mapping *buffer, int bufsize, int cutoff_f) { int i, insp; insp = 0; i = 0; if (progress_bar) progress_bar_message(2, 2, " cutoff freq."); for (i = 0; i < bufsize; i++) { if (buffer[i].freq >= cutoff_f) { buffer[insp].s = buffer[i].s; buffer[insp].t = buffer[i].t; buffer[insp].freq = buffer[i].freq; insp++; } } if (progress_bar) progress_bar_message(2, 2, " sorting rslt"); qsort(buffer, insp, sizeof(ID_Count_Mapping), compare_by_freq); if (progress_bar) progress_bar_percentage(2, 2, 100); /* so total percentage runs up to 100% */ return insp; }
int evaluate_subset(CorpusList *cl, /* the corpus */ FieldType the_field, /* the field to scan */ Constrainttree constr) { int line, position; int percentage, new_percentage; /* for ProgressBar */ assert(cl && constr); assert(cl->type == SUB || cl->type == TEMP); percentage = -1; EvaluationIsRunning = 1; for (line = 0; (line < cl->size) && EvaluationIsRunning; line++) { if (progress_bar) { new_percentage = floor(0.5 + (100.0 * line) / cl->size); if (new_percentage > percentage) { percentage = new_percentage; progress_bar_percentage(0, 0, percentage); } } switch (the_field) { case MatchField: position = cl->range[line].start; break; case MatchEndField: position = cl->range[line].end; break; case KeywordField: assert(cl->keywords); position = cl->keywords[line]; break; case TargetField: assert(cl->targets); position = cl->targets[line]; break; case NoField: default: position = -1; break; } if (position < 0 || (!eval_bool(constr, NULL, position))) { cl->range[line].start = -1; cl->range[line].end = -1; } } /* if interrupted, delete part of temporary query result which hasn't been filtered; so that the result is incomplete but at least contains only correct matches */ while (line < cl->size) { cl->range[line].start = -1; cl->range[line].end = -1; line++; } if (!EvaluationIsRunning) { cqpmessage(Warning, "Evaluation interruted: results may be incomplete."); if (which_app == cqp) install_signal_handler(); } EvaluationIsRunning = 0; if (progress_bar) progress_bar_message(0, 0, " cleaning up"); (void) RangeSetop(cl, RReduce, NULL, NULL); return 1; }
int evaluate_target(CorpusList *corp, /* the corpus */ FieldType t_id, /* the field to set */ FieldType base, /* where to start the search */ int inclusive, /* including or excluding the base */ SearchStrategy strategy, /* disambiguation rule: which item */ Constrainttree constr, /* the constraint */ enum ctxtdir direction, /* context direction */ int units, /* number of units */ char *attr_name) /* name of unit */ { Attribute *attr; int *table; Context context; int i, line, lbound, rbound; int excl_start, excl_end; int nr_evals; int percentage, new_percentage; /* for ProgressBar */ /* ------------------------------------------------------------ */ assert(corp); /* consistency check */ assert(t_id == TargetField || t_id == KeywordField || t_id == MatchField || t_id == MatchEndField); if (!constr) { cqpmessage(Error, "Constraing pattern missing in 'set target' command."); return 0; } if (corp->size <= 0) { cqpmessage(Error, "Corpus is empty."); return 0; } /* * check whether the base field specification is ok */ switch(base) { case MatchField: case MatchEndField: if (corp->range == NULL) { cqpmessage(Error, "No ranges for start of search"); return 0; } break; case TargetField: if (corp->targets == NULL) { cqpmessage(Error, "Can't start from base TARGET, none defined"); return 0; } break; case KeywordField: if (corp->keywords == NULL) { cqpmessage(Error, "Can't start from base KEYWORD, none defined"); return 0; } break; default: cqpmessage(Error, "Illegal base field (#%d) in 'set target' command.", base); return 0; } if (units <= 0) { cqpmessage(Error, "Invalid search space (%d units) in 'set target' command.", units); return 0; } /* THIS SHOULD BE UNNECESSARY, BECAUSE THE GRAMMAR MAKES SURE THE SUBCORPUS EXISTS & IS LOADED */ /* if (!access_corpus(corp)) { */ /* cqpmessage(Error, "Can't access named query %s.", corp->name); */ /* return 0; */ /* } */ context.size = units; context.direction = direction; if ((strcasecmp(attr_name, "word") == 0) || (strcasecmp(attr_name, "words") == 0)) { attr = find_attribute(corp->corpus, DEFAULT_ATT_NAME, ATT_POS, NULL); context.type = word; context.attrib = NULL; } else { attr = find_attribute(corp->corpus, attr_name, ATT_STRUC, NULL); context.type = structure; context.attrib = attr; } if (attr == NULL) { cqpmessage(Error, "Can't find attribute %s.%s", corp->mother_name, attr_name); return 0; } if (progress_bar) { progress_bar_clear_line(); progress_bar_message(1, 1, " preparing"); } table = (int *)cl_calloc(corp->size, sizeof(int)); EvaluationIsRunning = 1; nr_evals = 0; percentage = -1; for (line = 0; line < corp->size && EvaluationIsRunning; line++) { if (progress_bar) { new_percentage = floor(0.5 + (100.0 * line) / corp->size); if (new_percentage > percentage) { percentage = new_percentage; progress_bar_percentage(0, 0, percentage); } } table[line] = -1; switch(base) { case MatchField: excl_start = corp->range[line].start; excl_end = corp->range[line].end; if ((corp->range[line].start == corp->range[line].end) || inclusive) { if (calculate_ranges(corp, corp->range[line].start, context, &lbound, &rbound) == False) { Rprintf( "Can't compute boundaries for range #%d", line); lbound = rbound = -1; } } else { int dummy; if (calculate_ranges(corp, corp->range[line].start, context, &lbound, &dummy) == False) { Rprintf( "Can't compute left search space boundary match #%d", line); lbound = rbound = -1; } else if (calculate_ranges(corp, corp->range[line].end, context, &dummy, &rbound) == False) { Rprintf( "Can't compute right search space boundary match #%d", line); lbound = rbound = -1; } } break; case MatchEndField: excl_start = excl_end = corp->range[line].end; if (excl_start >= 0) { if (calculate_ranges(corp, corp->range[line].end, context, &lbound, &rbound) == False) { Rprintf( "Can't compute search space boundaries for match #%d", line); lbound = rbound = -1; } } else lbound = rbound = -1; break; case TargetField: excl_start = excl_end = corp->targets[line]; if (excl_start >= 0) { if (calculate_ranges(corp, corp->targets[line], context, &lbound, &rbound) == False) { Rprintf( "Can't compute search space boundaries for match #%d", line); lbound = rbound = -1; } } else lbound = rbound = -1; break; case KeywordField: excl_start = excl_end = corp->keywords[line]; if (excl_start >= 0) { if (calculate_ranges(corp, corp->keywords[line], context, &lbound, &rbound) == False) { Rprintf( "Can't compute search space boundaries for match #%d", line); lbound = rbound = -1; } } else lbound = rbound = -1; break; default: assert(0 && "Can't be"); return 0; } if ((lbound >= 0) && (rbound >= 0)) { int dist, maxdist; if (direction == left) { rbound = excl_start; if (strategy == SearchNearest) strategy = SearchRightmost; else if (strategy == SearchFarthest) strategy = SearchLeftmost; } else if (direction == right) { lbound = excl_start; if (strategy == SearchNearest) strategy = SearchLeftmost; else if (strategy == SearchFarthest) strategy = SearchRightmost; } switch (strategy) { case SearchFarthest: maxdist = MAX(excl_start - lbound, rbound - excl_start); assert(maxdist >= 0); for (dist = maxdist; dist >= 0; dist--) { i = excl_start - dist; if (i >= lbound && (inclusive || (i < excl_start))) if (eval_bool(constr, NULL, i)) { table[line] = i; break; } i = excl_start + dist; if (i <= rbound && (inclusive || (i > excl_end))) if (eval_bool(constr, NULL, i)) { table[line] = i; break; } nr_evals++; if (nr_evals == 1000) { CheckForInterrupts(); nr_evals = 0; } } break; case SearchNearest: maxdist = MAX(excl_start - lbound, rbound - excl_start); assert(maxdist >= 0); for (dist = 0; dist <= maxdist; dist++) { i = excl_start - dist; if (i >= lbound && (inclusive || (i < excl_start))) if (eval_bool(constr, NULL, i)) { table[line] = i; break; } i = excl_start + dist; if (i <= rbound && (inclusive || (i > excl_end))) if (eval_bool(constr, NULL, i)) { table[line] = i; break; } nr_evals++; if (nr_evals == 1000) { CheckForInterrupts(); nr_evals = 0; } } break; case SearchLeftmost: for (i = lbound; i <= rbound; i++) if (inclusive || (i < excl_start) || (i > excl_end)) { if (eval_bool(constr, NULL, i)) { table[line] = i; break; } nr_evals++; if (nr_evals == 1000) { CheckForInterrupts(); nr_evals = 0; } } break; case SearchRightmost: for (i = rbound; i >= lbound; i--) if (inclusive || (i < excl_start) || (i > excl_end)) { if (eval_bool(constr, NULL, i)) { table[line] = i; break; } nr_evals++; if (nr_evals == 1000) { CheckForInterrupts(); nr_evals = 0; } } break; default: break; } } } if (progress_bar) progress_bar_message(1, 1, " cleaning up"); switch (t_id) { case MatchField: for (i = 0; i < corp->size; i++) { if (table[i] >= 0) corp->range[i].start = table[i]; if (corp->range[i].start > corp->range[i].end) corp->range[i].start = corp->range[i].end; } cl_free(table); break; case MatchEndField: for (i = 0; i < corp->size; i++) { if (table[i] >= 0) corp->range[i].end = table[i]; if (corp->range[i].end < corp->range[i].start) corp->range[i].end = corp->range[i].start; } cl_free(table); break; case TargetField: cl_free(corp->targets); corp->targets = table; break; case KeywordField: cl_free(corp->keywords); corp->keywords = table; break; default: assert(0 && "Can't be"); break; } if (progress_bar) progress_bar_clear_line(); if ((t_id == MatchField) || (t_id == MatchEndField)) RangeSort(corp, 0); /* re-sort corpus if match regions were modified */ touch_corpus(corp); if (!EvaluationIsRunning) { cqpmessage(Warning, "Evaluation interruted: results may be incomplete."); if (which_app == cqp) install_signal_handler(); } EvaluationIsRunning = 0; return 1; }
Group * ComputeGroupInternally(Group *group) { ID_Count_Mapping node; ID_Count_Mapping *result; int i; size_t nr_nodes; int percentage, new_percentage; /* for ProgressBar */ int size = group->my_corpus->size; /* ---------------------------------------------------------------------- */ nr_nodes = 0; if (progress_bar) progress_bar_clear_line(); percentage = -1; EvaluationIsRunning = 1; for (i = 0; i < size; i++) { if (! EvaluationIsRunning) break; /* user abort (Ctrl-C) */ if (progress_bar) { new_percentage = floor(0.5 + (100.0 * i) / size); if (new_percentage > percentage) { percentage = new_percentage; progress_bar_percentage(1, 2, percentage); } } node.s = get_group_id(group, i, 0); /* source ID */ node.t = get_group_id(group, i, 1); /* target ID */ node.freq = 0; result = binsert_g(&node, (void **) &(group->count_cells), &nr_nodes, sizeof(ID_Count_Mapping), compare_st_cells); result->freq++; } if (EvaluationIsRunning) { group->nr_cells = sum_freqs(group->count_cells, nr_nodes, group->cutoff_frequency); if (progress_bar) progress_bar_clear_line(); if (group->nr_cells < nr_nodes) group->count_cells = cl_realloc(group->count_cells, (group->nr_cells * sizeof(ID_Count_Mapping))); } else { cqpmessage(Warning, "Group operation aborted by user."); if (which_app == cqp) install_signal_handler(); free_group(&group); /* sets return value to NULL to indicate failure */ } EvaluationIsRunning = 0; return group; }