Esempio n. 1
0
/* check that line ends in semicolon, otherwise append one to the string
   (returns either same pointer or re-allocated and modified string) */
char *
ensure_semicolon (char *line) {
  int i, l;

  if (line) {
    l = strlen(line);
    if (l > 0) {
      i = l-1;
      while ((i >= 0) && (line[i] == ' ' || line[i] == '\t' || line[i] == '\n'))
        i--;
      if (i < 0) {
        *line = 0;              /* line contains only whitespace -> replace by empty string */
      }
      else {
        if (line[i] != ';') {   /* this is the problematic case: last non-ws character is not a ';' */
          if (i+1 < l) {        /* have some whitespace at end of string that we can overwrite */
            line[i+1] = ';';
            line[i+2] = 0;
          }
          else {                /* need to reallocate string to make room for ';' */
            line = cl_realloc(line, l+2);
            line[l] = ';';
            line[l+1] = 0;
          }
        }
      }
    }
  }
  return (line);                /* return pointer to line (may have been modified and reallocated */
}
Esempio n. 2
0
void
cl_log_args(int argc, char **argv)
{
	int lpc = 0;
	int len = 0;
	int existing_len = 0;
	char *arg_string = NULL;

	if(argc == 0 || argv == NULL) {
	    return;
	}
	
	for(;lpc < argc; lpc++) {
		if(argv[lpc] == NULL) {
			break;
		}
		
		len = 2 + strlen(argv[lpc]); /* +1 space, +1 EOS */
		if(arg_string) {
			existing_len = strlen(arg_string);
		}

		arg_string = cl_realloc(arg_string, len + existing_len);
		sprintf(arg_string + existing_len, "%s ", argv[lpc]);
	}
	cl_log(LOG_INFO, "Invoked: %s", arg_string);
	cl_free(arg_string);
}
Esempio n. 3
0
void
AddEquiv(int L, int R)
{
  int E; 
  State SL, SR;

  L = STab[L].Class;
  R = STab[R].Class;

  if (L == R) return;
  if (L > R)
    { 
      L ^= R;
      R ^= L;
      L ^= R;
    }

  SL = &STab[L];
  SR = &STab[R];

  for (E = 0; E < Es; E++)
    if (SL == ETab[E].L && SR == ETab[E].R) 
      return;
  if (Es >= EMax)     {
    EMax += 8;
    ETab = cl_realloc(ETab, sizeof *ETab * EMax);
  }
  ETab[Es].L = SL;
  ETab[Es].R = SR;
  Es++;
}
Esempio n. 4
0
/* add string (must be alloc'ed by caller) to completion list */
void
cc_compl_list_add(char *string) {
  if (cc_compl_list_size >= cc_compl_list_allocated - 1) {
    /* extend list if necessary (NB: need to leave room for NULL marker at end of list) */
    cc_compl_list_allocated += CC_COMPL_LIST_ALLOC_BLOCK;
    cc_compl_list = (char **) cl_realloc(cc_compl_list, cc_compl_list_allocated * sizeof(char *));
  }
  cc_compl_list[cc_compl_list_size++] = string;
  cc_compl_list[cc_compl_list_size] = NULL;
}
Esempio n. 5
0
int
VariableAddItem(Variable v, char *item)
{
  int i;

  if (!VariableItemMember(v, item)) {
    
    v->valid = 0;
    
    for (i = 0; i < v->nr_items; i++)
      if (v->items[i].free) {
        v->items[i].free = 0;
        v->items[i].sval = cl_strdup(item);
        v->items[i].ival = -1;
        break;
      }

    if (i >= v->nr_items) {

      /* no space in list. malloc. */

      v->nr_items += ITEM_REALLOC;
      
      if (v->items == NULL) 
        v->items = (VariableItem *)cl_malloc(sizeof(VariableItem) *
                                          v->nr_items);
      else 
        v->items = (VariableItem *)cl_realloc(v->items,
                                           sizeof(VariableItem) *
                                           v->nr_items);
      
      if (v->items == NULL) {
        fprintf(stderr, "Fatal Error #6: no memory left.");
        perror("Memory fault");
        assert(0 && "Big Problem here!");
      }

      v->items[i].sval = cl_strdup(item);
      v->items[i].free = 0;
      v->items[i].ival = -1;

      i++;

      for ( ; i < v->nr_items; i++) {
        v->items[i].sval = NULL;
        v->items[i].free = 1;
        v->items[i].ival = -1;
      }
    }
  }
  return 1;
}
Esempio n. 6
0
void
AddBuf(Symbol LHS, int Q)
{

  int Diff, I, J, S, T; 
  Item IP; 
  char *Name = LHS->Name;

  for (I = 0; I < Is; I++) 
    {
      Diff = strcmp(IBuf[I].LHS->Name, Name);
      if (Diff == 0) 
        goto FOUND;
      if (Diff > 0) break;
    }
  if (Is >= IMax)
    { 
      IMax += 8;
      IBuf = cl_realloc(IBuf, sizeof *IBuf * IMax);
    }
  for (J = Is++; J > I; J--) 
    IBuf[J] = IBuf[J - 1];
  IBuf[I].LHS = LHS, IBuf[I].Size = 0, IBuf[I].RHS = 0;

 FOUND:
  IP = &IBuf[I];
  for (S = 0; S < IP->Size; S++) 
    {
      if (IP->RHS[S] == Q) 
        return;
      if (IP->RHS[S] > Q) 
        break;
    }
  if ((IP->Size&7) == 0)
    IP->RHS = cl_realloc(IP->RHS, sizeof *IP->RHS * (IP->Size + 8));
  for (T = IP->Size++; T > S; T--) 
    IP->RHS[T] = IP->RHS[T - 1];
  IP->RHS[S] = Q;
}
Esempio n. 7
0
void PushQ(int Q) 
{

  if (EquTab[Q].Stack) 
    return;
  if (Xs == XMax)
    {
      XMax += X_EXTEND;
      XStack = cl_realloc(XStack, sizeof *XStack * XMax);
    }
  XStack[Xs++] = Q; 
  EquTab[Q].Stack = 1;
}
Esempio n. 8
0
Variable
NewVariable(char *varname)
{
  Variable v;
  int i;

  if (varname == NULL)
    return NULL;

  v = (Variable)cl_malloc(sizeof(VariableBuffer));
  v->valid = 0;
  v->my_name = cl_strdup(varname);
  v->my_corpus = NULL;
  v->my_attribute = NULL;
  v->nr_items = 0;
  v->items = NULL;

  for (i = 0; i < nr_variables; i++) {
    if (VariableSpace[i] == NULL) {
      VariableSpace[i] = v;
      break;
    }
  }

  if (i >= nr_variables) {

    /* not inserted, malloc */
    
    nr_variables += VARIABLE_REALLOC;

    if (VariableSpace == NULL)
      VariableSpace = (Variable *)cl_malloc(nr_variables * sizeof(Variable));
    else
      VariableSpace = (Variable *)cl_realloc(VariableSpace, 
                                          nr_variables * sizeof(Variable));
    if (VariableSpace == NULL) {
      fprintf(stderr, "Fatal Error: Variable space out of memory.\n");
      assert(0 && "Sorry, big problem here!");
    }
    
    VariableSpace[i++] = v;

    for ( ; i < nr_variables; i++)
      VariableSpace[i] = NULL;
  }

  return v;
}
Esempio n. 9
0
int
AddState(int States, int *SList)
{

  int D, I; 
  State DP;

  for (D = 0; D < Ss; D++) 
    {
      DP = &STab[D];
      if (States != DP->States) 
        continue;
      for (I = 0; I < States; I++)
        if (SList[I] != DP->SList[I]) 
          break;
      if (I >= States) 
        { 
          free(SList); 
          return D; 
        }
    }
  /* TODO
   * Brilliant ... the cl_realloc() below might move the state table around in memory if it cannot
     be expanded in place, breaking any pointers into the table held in local variables of the calling
     function.  Fortunately, AddState() is only called from FormState() in a loop that modifies 
     "embedded" variables, so that this bug only surfaces if the original memory location is overwritten
     immediately (while the loop is still running).  It can be triggered reliably by the query
         ([pos = "IN|TO"] [pos = "DT.*"]? [pos = "JJ.*"]* [pos = "N.*"]+){3};
     on a PowerPC G4 running Mac OS X 10.4 (God knows why it happens in this configuration).
     To avoid the problem, local pointers into STab[] should be updated after every call to AddState(). */
  if ((Ss&7) == 0) 
    STab = cl_realloc(STab, sizeof *STab * (Ss + 8));
  STab[Ss].Class = Ss;
  STab[Ss].States = States;
  STab[Ss].SList = SList;
  return Ss++;
}
Esempio n. 10
0
File: groups.c Progetto: cran/rcqp
Group *
ComputeGroupExternally(Group *group)
{
  int i;
  int size = group->my_corpus->size;
  int cutoff_freq = group->cutoff_frequency;

  char temporary_name[TEMP_FILENAME_BUFSIZE];
  FILE *fd;
  FILE *pipe;
  char sort_call[CL_MAX_LINE_LENGTH];

  /* ---------------------------------------------------------------------- */

  if ((fd = open_temporary_file(temporary_name)) == NULL) {
    perror("Error while opening temporary file");
    cqpmessage(Warning, "Can't open temporary file");
    return group;
  }

  for (i = 0; i < size; i++) {
    fprintf(fd, "%d %d\n", get_group_id(group, i, 0), get_group_id(group, i, 1)); /* (source ID, target ID) */
  }
  fclose(fd);

  /* construct sort call */
  sprintf(sort_call, ExternalGroupingCommand, temporary_name);
  if (GROUP_DEBUG)
   Rprintf( "Running grouping sort: \n\t%s\n",
            sort_call);
  if ((pipe = popen(sort_call, "r")) == NULL) {
    perror("Failure opening grouping pipe");
    cqpmessage(Warning, "Can't open grouping pipe:\n%s\n"
               "Disable external grouping by\n"
               "  set UseExternalGrouping off;", 
               sort_call);
  }
  else {
    int freq, p1, p2, tokens;
#define GROUP_REALLOC 16

    while ((tokens = fscanf(pipe, "%d%d%d", &freq, &p1, &p2)) == 3) {
      if (freq > cutoff_freq) {
        if ((group->nr_cells % GROUP_REALLOC) == 0) {
          if (group->count_cells == NULL) {
            group->count_cells = 
              (ID_Count_Mapping *)cl_malloc(GROUP_REALLOC *
                                         sizeof(ID_Count_Mapping));
          }
          else {
            group->count_cells = 
              (ID_Count_Mapping *)cl_realloc(group->count_cells,
                                          (group->nr_cells + GROUP_REALLOC) *
                                          sizeof(ID_Count_Mapping));
          }
          assert(group->count_cells);
        }

        group->count_cells[group->nr_cells].s = p1;
        group->count_cells[group->nr_cells].t = p2;
        group->count_cells[group->nr_cells].freq = freq;

        group->nr_cells = group->nr_cells + 1;
      }
    }

    if (tokens != EOF) {
     Rprintf( "Warning: could not reach EOF of temporary file!\n");
    }

    pclose(pipe);
  }

  if (GROUP_DEBUG) {
   Rprintf( "Keeping temporary file %s -- delete manually\n",
            temporary_name);
  }
  else if (unlink(temporary_name) != 0) {
    perror(temporary_name);
   Rprintf( "Can't remove temporary file %s -- \n\tI will continue, "
            "but you should remove that file.\n", temporary_name);
  }
  
  return group;
}
Esempio n. 11
0
File: groups.c Progetto: cran/rcqp
Group *
ComputeGroupInternally(Group *group)
{
  ID_Count_Mapping node;
  ID_Count_Mapping *result;

  int i;
  size_t nr_nodes;
  int percentage, new_percentage; /* for ProgressBar */
  int size = group->my_corpus->size;

  /* ---------------------------------------------------------------------- */

  nr_nodes = 0;
  
  if (progress_bar)
    progress_bar_clear_line();
  percentage = -1;

  EvaluationIsRunning = 1;

  for (i = 0; i < size; i++) {
    if (! EvaluationIsRunning)
      break;                    /* user abort (Ctrl-C) */

    if (progress_bar) {
      new_percentage = floor(0.5 + (100.0 * i) / size);
      if (new_percentage > percentage) {
        percentage = new_percentage;
        progress_bar_percentage(1, 2, percentage);
      }
    }

    node.s = get_group_id(group, i, 0);       /* source ID */
    node.t = get_group_id(group, i, 1);       /* target ID */
    node.freq = 0;
  
    result = binsert_g(&node,
                       (void **) &(group->count_cells),
                       &nr_nodes,
                       sizeof(ID_Count_Mapping),
                       compare_st_cells);

    result->freq++;
  }

  if (EvaluationIsRunning) {
    group->nr_cells = sum_freqs(group->count_cells, nr_nodes, group->cutoff_frequency);
    
    if (progress_bar)
      progress_bar_clear_line();
    
    if (group->nr_cells < nr_nodes)
      group->count_cells = 
        cl_realloc(group->count_cells, (group->nr_cells * sizeof(ID_Count_Mapping)));
  }
  else {
    cqpmessage(Warning, "Group operation aborted by user.");
    if (which_app == cqp) install_signal_handler();
    free_group(&group);         /* sets return value to NULL to indicate failure */
  }
  EvaluationIsRunning = 0;
    
  return group;
}
Esempio n. 12
0
/**
 * Creates a Mapping from a file.
 *
 * Each line in the file results in a SingleMapping (unless it begins in #,
 * in which case it either indicates the name of the mapping or is a comment).
 *
 * Within a single line, the first white-space delimited token represents
 * the name of the class, and the other tokens are attribute values.
 *
 * Any parse failure in the file will stop the entire Mapping-creation process
 * and result in NULL being returned.
 *
 * @param corpus        The corpus for which the Mapping is valid (pointer).
 * @param attr_name     String naming the attribute for which the mapping is valid.
 * @param file_name     The filename of the map spec.
 * @param error_string  A char * (not char[]), which is set to an error
 *                      string, or to NULL if all is OK.
 * @return              The resulting Mapping object, or NULL in case of error.
 */
Mapping
read_mapping(Corpus *corpus,
             char *attr_name,
             char *file_name,
             char **error_string)
{
  FILE *fd;
  Attribute *attr;
  Mapping m = NULL;
  char s[CL_MAX_LINE_LENGTH];

  if (corpus == NULL) {
    *error_string = "corpus argument missing";
    return NULL;
  }

  if (attr_name == NULL) {
    *error_string = "attribute name argument missing";
    return NULL;
  }

  if ((attr = find_attribute(corpus, attr_name, ATT_POS, NULL)) == NULL) {
    *error_string = "no such attribute in corpus";
    return NULL;
  }

  if ((fd = fopen(file_name, "r")) == NULL) {
    *error_string = "Can't open mapping file";
    return NULL;
  }

  m = cl_malloc(sizeof(MappingRecord));


  m->corpus = corpus;
  m->mapping_name = NULL;
  m->attribute = attr;
  m->nr_classes = 0;
  m->classes = NULL;

  *error_string = "Not yet implemented";

  if (!m->attribute) {
    *error_string = "no such attribute for corpus";
    drop_mapping(&m);
  }

  while ( m  &&  fgets(s, CL_MAX_LINE_LENGTH, fd) != NULL ) {

    if (s[0] && s[strlen(s)-1] == '\n')
      s[strlen(s)-1] = '\0';

    /* NB. The following if-else takes up all the rest of this while-loop. */
    if (s[0] == '#') {

      /* lines beginning with # */

      /* if this line begins with the NAME_TOKEN... */
      if (strncasecmp(s, NAME_TOKEN, strlen(NAME_TOKEN)) == 0) {

        /* set the name */

        if (m->mapping_name) {
          *error_string = "Multiple mapping names declared";
          drop_mapping(&m);
        }
        else if (!s[strlen(NAME_TOKEN)]) {
          *error_string = "Error in #NAME declaration";
          drop_mapping(&m);
        }
        else {
          m->mapping_name = cl_strdup(s + strlen(NAME_TOKEN));
        }
      }

      /* everything else beginning with # is a comment  (and can thus be ignored) */

    }
    else if (s[0]) {

      /* lines NOT beginning with # */

      /* make new single mapping */

      char *token;
      SingleMappingRecord *this_class = NULL;

      token = strtok(s, " \t\n");

      if (token) {

        /* first token is class name, rest are attribute values */

        /* test: class 'token' already defined? */
        if (find_mapping(m, token) != NULL) {
          *error_string = "Class defined twice";
          drop_mapping(&m);
          break;
        }

        /* create new class */

        if (m->nr_classes == 0) {
          m->classes =
            (SingleMappingRecord *)
            cl_malloc(sizeof(SingleMappingRecord) * CLASS_REALLOC_THRESHOLD);
        }
        else if (m->nr_classes % CLASS_REALLOC_THRESHOLD == 0) {
          m->classes =
            (SingleMappingRecord *)
            cl_realloc(m->classes,
                    sizeof(SingleMappingRecord) *
                    (m->nr_classes + CLASS_REALLOC_THRESHOLD));
        }
        /* else there is enough memory for this new class already! */

        if (m->classes == NULL) {
          *error_string = "Memory allocation failure";
          drop_mapping(&m);
        }
        else {
          m->classes[m->nr_classes].class_name = cl_strdup(token);
          m->classes[m->nr_classes].nr_tokens = 0;
          m->classes[m->nr_classes].tokens = NULL;

          this_class = &(m->classes[m->nr_classes]);
        }

        /* create single mappings : loop through remaining tokens on this line */

        while (m &&
               (token = strtok(NULL, " \t\n"))) {

          int id;

          /* test: token member of attribute values of my attribute? */

          id = get_id_of_string(attr, token);

          if (id < 0 || cderrno != CDA_OK) {
            *error_string = "token not member of attribute";
            drop_mapping(&m);
            break;
          }

          /* test: token already member of any class? */

          if (map_token_to_class(m, token) != NULL) {
            *error_string = "token member of several classes";
            drop_mapping(&m);
            break;
          }
          else if (this_class->tokens) {
            int i;

            for (i = 0; i < this_class->nr_tokens; i++)
              if (this_class->tokens[i] == id) {
                *error_string = "token member of several classes";
                drop_mapping(&m);
                break;
              }
          }

          /* having passed all the tests, put token id into this mapping */

          if (m) {

            if (this_class->nr_tokens == 0) {
              this_class->tokens =
                (int *)
                cl_malloc(sizeof(int) * TOKEN_REALLOC_THRESHOLD);
            }
            else if (this_class->nr_tokens % TOKEN_REALLOC_THRESHOLD == 0) {

              this_class->tokens =
                (int *)
                cl_realloc(this_class->tokens,
                        sizeof(int) * (this_class->nr_tokens +
                                       TOKEN_REALLOC_THRESHOLD));
            }

            if (this_class->tokens == NULL) {
              *error_string = "Memory allocation failure";
              drop_mapping(&m);
            }
            else {
              this_class->tokens[this_class->nr_tokens] = id;
              this_class->nr_tokens++;
            }
          }
        } /* endwhile (loop for each token on a line) */

        if (m) {

          m->nr_classes++;

          /* sort token IDs in increasing order */

          qsort(this_class->tokens,
                this_class->nr_tokens,
                sizeof(int),
                intcompare);

        }
      }
    }
  } /* endwhile (main loop for each line in the mapping file */

  fclose(fd);

  return m;
}
Esempio n. 13
0
/**
 * Perform "operation" on the two match lists (can be initial).
 *
 * The result is assigned to list1.
 *
 *
 * this whole code is WRONG when one of the matchlists is inverted
 * TODO!
 *
 * Also TODO: give it a better name.
 *
 * This contains, by far, most of the code in the Matchlist module.
 */
int
Setop(Matchlist *list1, MLSetOp operation, Matchlist *list2)
{
  int i, j, k, t, ins;
  Matchlist tmp;
  Attribute *attr;

  switch (operation) {

  case Union:

    /*
     * -------------------- UNION
     */

    /*
     * TODO:
     * optimize in case 
     *   (list1->matches_whole_corpus && list2->matches_whole_corpus)
     */
    
    if (list2->start == NULL)

      if (list2->is_inverted) {
        /* l2 is empty, but inverted, so the result is the whole corpus,
         * as in l2. */
        return Setop(list1, Identity, list2);
      }
      else 
        /* the result is list1, so just return */
        return 1;

    else if (list1->start == NULL)

      if (list1->is_inverted)
        /* empty, but inverted --> whole corpus, l1 */
        return 1;
      else 
        /* the result is in list2, so return a copy */
        return Setop(list1, Identity, list2);

    else if (list1->is_inverted && list2->is_inverted) {

      /* union of 2 inverted lists is the inverted intersection */

      list1->is_inverted = 0; list2->is_inverted = 0;
      Setop(list1, Intersection, list2);
      list1->is_inverted = 1;
      
    }
    else {

      if (list1->is_inverted) {
        list1->is_inverted = 0;
        Setop(list1, Complement, NULL);
      }
      if (list2->is_inverted) {
        list2->is_inverted = 0;
        Setop(list2, Complement, NULL);
      }

      tmp.tabsize = list1->tabsize + list2->tabsize;

      tmp.start = (int *)cl_malloc(sizeof(int) * tmp.tabsize);

      if (list1->end && list2->end)
        tmp.end   = (int *)cl_malloc(sizeof(int) * tmp.tabsize);
      else
        tmp.end = NULL;

      if (list1->target_positions && list2->target_positions)
        tmp.target_positions = (int *)cl_malloc(sizeof(int) * tmp.tabsize);
      else
        tmp.target_positions = NULL;





      i = 0;                        /* the position in list1 */
      j = 0;                        /* the position in list2 */
      k = 0;                        /* the insertion point in the result list `tmp' */


      while ((i < list1->tabsize) || (j < list2->tabsize))

        if ((i < list1->tabsize) && (list1->start[i] == -1))
          i++;
        else if ((j < list2->tabsize) && (list2->start[j] == -1))
          j++;
        else if ((j >= list2->tabsize) ||
            ((i < list1->tabsize) && (list1->start[i] < list2->start[j]))) {

          /* copy (remaining) item from list1 */

          tmp.start[k] = list1->start[i];

          if (tmp.end)
            tmp.end[k] = list1->end[i];

          if (tmp.target_positions)
            tmp.target_positions[k] = list1->target_positions[i];

          k++;
          i++;

        }
        else if ((i >= list1->tabsize) ||
                 ((j < list2->tabsize) && (list1->start[i] > list2->start[j]))) {

          /* copy (remaining) item from list2 */
          tmp.start[k] = list2->start[j];

          if (tmp.end)
            tmp.end[k] = list2->end[j];

          if (tmp.target_positions)
            tmp.target_positions[k] = list2->target_positions[j];

          k++;
          j++;

        }
        else {

          /* both start positions are identical. Now check whether the end
           * positions are also the same => the ranges are identical and
           * the duplicate is to be eliminated.
           */

          tmp.start[k] = list1->start[i];

          if ((tmp.end == NULL) || (list1->end[i] == list2->end[j])) {

            /* real duplicate, copy once */

            if (tmp.end)
              tmp.end[k]   = list1->end[i];

            if (tmp.target_positions)
              tmp.target_positions[k]   = list1->target_positions[i];

            i++;
            j++;

          }
          else {

            /*
             * we have existing, non-equal end positions. copy the smaller one.
             */

            if (list1->end[i] < list2->end[j]) {
              tmp.end[k]   = list1->end[i];

              if (tmp.target_positions)
                tmp.target_positions[k] = list1->target_positions[i];

              i++;
            }
            else {
              tmp.end[k]   = list2->end[j];

              if (tmp.target_positions)
                tmp.target_positions[k] = list2->target_positions[j];

              j++;
            }

          }
          k++;
        }

      assert(k <= tmp.tabsize);

      /* we did not eliminate any duplicates if k==tmp.tabsize. 
       * So, in that case, we do not have to bother with reallocs.
       */
      
      if (k < tmp.tabsize) {
        tmp.start = (int *)cl_realloc((char *)tmp.start, sizeof(int) * k);
        if (tmp.end)
          tmp.end = (int *)cl_realloc((char *)tmp.end, sizeof(int) * k);
        if (tmp.target_positions)
          tmp.target_positions = (int *)cl_realloc((char *)tmp.target_positions, sizeof(int) * k);
      }

      cl_free(list1->start);
      cl_free(list1->end);
      cl_free(list1->target_positions);

      list1->start = tmp.start; tmp.start = NULL;
      list1->end   = tmp.end;   tmp.end = NULL;
      list1->target_positions = tmp.target_positions;   tmp.target_positions = NULL;
      list1->tabsize = k;
      list1->matches_whole_corpus = 0;
      list1->is_inverted = 0;
    }

    break;

  case Intersection:

    /*
     * -------------------- INTERSECTION
     */

    if (list1->tabsize == 0 && list1->is_inverted)

      /* l1 matches whole corpus, so intersection is equal to l2 */
      return Setop(list1, Identity, list2);

    else if (list2->tabsize == 0 && list2->is_inverted)
      /* l2 matches whole corpus, so intersection is equal to l1 */
      return 1;

    else if ((list1->tabsize == 0) || (list2->tabsize == 0)) {

      /*
       * Bingo. one of the two is empty AND NOT INVERTED. So
       * the intersection is also empty.
       */
      
      cl_free(list1->start);
      cl_free(list1->end);
      cl_free(list1->target_positions);
      list1->tabsize = 0;
      list1->matches_whole_corpus = 0;
      list1->is_inverted = 0;
      
    }
    else if (list1->is_inverted && list2->is_inverted) {
      
      /* intersection of 2 inverted lists is the inverted union */

      list1->is_inverted = 0; list2->is_inverted = 0;
      Setop(list1, Union, list2);
      list1->is_inverted = 1;
      
    }
    else {

      /*
       * Two non-empty lists. ONE of both may be inverted.
       * We have to do some work then
       */

      if (list1->is_inverted)
        tmp.tabsize = list2->tabsize;
      else if (list2->is_inverted)
        tmp.tabsize = list1->tabsize;
      else
        tmp.tabsize = MIN(list1->tabsize, list2->tabsize);

      tmp.start = (int *)cl_malloc(sizeof(int) * tmp.tabsize);

      if (list1->end && list2->end)
        tmp.end   = (int *)cl_malloc(sizeof(int) * tmp.tabsize);
      else
        tmp.end = NULL;

      if (list1->target_positions && list2->target_positions)
        tmp.target_positions = (int *)cl_malloc(sizeof(int) * tmp.tabsize);
      else
        tmp.target_positions = NULL;


      i = 0;                        /* the position in list1 */
      j = 0;                        /* the position in list2 */
      k = 0;                        /* the insertion point in the result list */

      while ((i < list1->tabsize) && (j < list2->tabsize))

        if (list1->start[i] < list2->start[j])
          i++;
        else if (list1->start[i] > list2->start[j])
          j++;
        else {

          /* both start positions are identical. Now check whether the end
           * positions are also the same => the ranges are identical and
           * one version is to be copied.
           */

          if ((tmp.end == NULL) || (list1->end[i] == list2->end[j])) {

            /* real duplicate, copy once */

            tmp.start[k] = list1->start[i];

            if (tmp.end)
              tmp.end[k]   = list1->end[i];

            if (tmp.target_positions)
              tmp.target_positions[k]   = list1->target_positions[i];

            i++;
            j++;
            k++;
          }
          else {

            /*
             * we have existing, non-equal end positions. Advance on
             * list with the smaller element.
             */

            if (list1->end[i] < list2->end[j])
              i++;
            else
              j++;
          }
        }

      assert(k <= tmp.tabsize);

      if (k == 0) {
        /* we did not copy anything. result is empty. */
        cl_free(tmp.start); tmp.start = NULL;
        cl_free(tmp.end);   tmp.end   = NULL;
        cl_free(tmp.target_positions); tmp.target_positions = NULL;
      }
      else if (k < tmp.tabsize) {

        /* we did not eliminate any duplicates if k==tmp.tabsize.
         * So, in that case, we do not have to bother with reallocs.
         */
      
        tmp.start = (int *)cl_realloc((char *)tmp.start, sizeof(int) * k);
        if (tmp.end)
          tmp.end = (int *)cl_realloc((char *)tmp.end, sizeof(int) * k);
        if (tmp.target_positions)
          tmp.target_positions = (int *)cl_realloc((char *)tmp.target_positions, sizeof(int) * k);
      }

      cl_free(list1->start);
      cl_free(list1->end);
      cl_free(list1->target_positions);

      list1->start = tmp.start; tmp.start = NULL;
      list1->end   = tmp.end;   tmp.end = NULL;

      list1->target_positions   = tmp.target_positions;
      tmp.target_positions = NULL;

      list1->tabsize = k;
      list1->matches_whole_corpus = 0;
      list1->is_inverted = 0;
    }

    break;

  case Complement:

    /*
     * -------------------- COMPLEMENT 
     * in that case. ML2 should be empty. We suppose it is.
     */

    /* 
     * what the hell is the complement of a non-initial matchlist?
     * I simply do not know. so do it only for initial ones.
     */
    
    if (list1->end) {
      fprintf(stderr, "Can't calculate complement for non-initial matchlist.\n");
      return 0;
    }

    /* we could always make the complement by toggling the inversion flag,
     * but we only do that in case the list is inverted, otherwise we would
     * need another function to physically make the complement
     */

    if (list1->is_inverted) {
      list1->is_inverted = 0; 
      return 1;
    }

    if (!evalenv) {
      fprintf(stderr, "Can't calculate complement with NULL eval env\n");
      return 0;
    }

    if (!evalenv->query_corpus) {
      fprintf(stderr, "Can't calculate complement with NULL query_corpus.\n");
      return 0;
    }

    if (!access_corpus(evalenv->query_corpus)) {
      fprintf(stderr, "Complement: can't access current corpus.\n");
      return 0;
    }

    /* 
     * OK. The tests went by. Now, the size of the new ML is the 
     * size of the corpus MINUS the size of the current matchlist.
     */

    if ((attr = find_attribute(evalenv->query_corpus->corpus,
                               DEFAULT_ATT_NAME, ATT_POS, NULL)) == NULL) {
      fprintf(stderr, "Complement: can't find %s attribute of current corpus\n",
              DEFAULT_ATT_NAME);
      return 0;
    }

    i = cl_max_cpos(attr);
    if (cl_errno != CDA_OK) {
      fprintf(stderr, "Complement: can't get attribute size\n");
      return 0;
    }

    tmp.tabsize = i - list1->tabsize;

    if (tmp.tabsize == 0) {

      /*
       * Best case. Result is empty.
       */

      cl_free(list1->start);
      cl_free(list1->end);
      cl_free(list1->target_positions);
      list1->matches_whole_corpus = 0;
      list1->tabsize = 0;
      list1->is_inverted = 0;
    }
    else if (tmp.tabsize == i) {

      /*
       * Worst case. 
       * result is a copy of the corpus.
       *
       * TODO: This is not true if we have -1 elements in the source list.
       *
       */

      cl_free(list1->start);
      cl_free(list1->end);
      cl_free(list1->target_positions);

      list1->start = (int *)cl_malloc(sizeof(int) * tmp.tabsize);
      list1->tabsize = tmp.tabsize;
      list1->matches_whole_corpus = 1;
      list1->is_inverted = 0;

      for (i = 0; i < tmp.tabsize; i++)
        list1->start[i] = i;
    }
    else {

      /*
       * in between.
       */
      
      tmp.start = (int *)cl_malloc(sizeof(int) * tmp.tabsize);
      tmp.end = NULL;
      tmp.target_positions = NULL;
      tmp.matches_whole_corpus = 0;

      j = 0;                        /* index in source list */
      t = 0;                        /* index in target list */
      for (k = 0; k < i; k++) {
        if ((j >= list1->tabsize) || (k < list1->start[j])) {
          tmp.start[t] = k;
          t++;
        }
        else if (k == list1->start[j]) {
          j++;
        }
        else /* (k > list1->start[j]) */ {
          assert("Error in Complement calculation routine" && 0);
        }
      }
      assert(t == tmp.tabsize);

      cl_free(list1->start);
      cl_free(list1->end);
      cl_free(list1->target_positions);

      list1->start = tmp.start; tmp.start = NULL;
      list1->end   = tmp.end;   tmp.end = NULL;
      list1->tabsize = tmp.tabsize;
      list1->matches_whole_corpus = 0;
      list1->is_inverted = 0;
    }


    break;

  case Identity:

    /* 
     * -------------------- IDENTITY
     * create a copy of ML2 into ML1
     */

    free_matchlist(list1);

    list1->tabsize = list2->tabsize;
    list1->matches_whole_corpus = list2->matches_whole_corpus;
    list1->is_inverted = list2->is_inverted;

    if (list2->start) {
      list1->start = (int *)cl_malloc(sizeof(int) * list2->tabsize);
      memcpy((char *)list1->start, (char *)list2->start, sizeof(int) * list2->tabsize);
    }

    if (list2->end) {
      list1->end = (int *)cl_malloc(sizeof(int) * list2->tabsize);
      memcpy((char *)list1->end, (char *)list2->end, sizeof(int) * list2->tabsize);
    }

    if (list2->target_positions) {
      list1->target_positions = (int *)cl_malloc(sizeof(int) * list2->tabsize);
      memcpy((char *)list1->target_positions,
             (char *)list2->target_positions, sizeof(int) * list2->tabsize);
    }

    break;

  case Uniq:

    /* 
     * -------------------- UNIQ
     * create a unique version of ML1
     * working destructively on list1
     */

    if (list1->start && (list1->tabsize > 0)) {

      ins = 0;                        /* the insertion point */

      if (list1->end)

        for (i = 0; i < list1->tabsize; i++) {

          if ((ins == 0) ||
              ((list1->start[i] != list1->start[ins-1]) ||
               (list1->end[i] != list1->end[ins-1]))) {

            /* copy the data from the current position
             * down to the insertion point.
             */

            list1->start[ins] = list1->start[i];
            list1->end[ins]   = list1->end[i];
            if (list1->target_positions)
              list1->target_positions[ins]   = list1->target_positions[i];
            ins++;
          }
        }
      else
        for (i = 0; i < list1->tabsize; i++) {
          if ((ins == 0) || (list1->start[i] != list1->start[ins-1])) {

            /* copy the data from the current position
             * down to the insertion point.
             */

            list1->start[ins] = list1->start[i];
            if (list1->target_positions)
              list1->target_positions[ins]   = list1->target_positions[i];
            ins++;
          }
        }
    
      if (ins != list1->tabsize) {

        /*
         * no elements were deleted from the list when ins==tabsize. So
         * we do not have to do anything then.
         * Otherwise, the list was used destructively. Free up used space.
         */

        list1->start = (int *)cl_realloc(list1->start, sizeof(int) * ins);
        if (list1->end)
          list1->end = (int *)cl_realloc(list1->end,   sizeof(int) * ins);
        if (list1->target_positions)
          list1->target_positions = (int *)cl_realloc(list1->target_positions,   sizeof(int) * ins);
        list1->tabsize = ins;
        list1->matches_whole_corpus = 0;
        list1->is_inverted = 0;
      }
    }

    break;

  case Reduce:

    if ((list1->start) && (list1->tabsize > 0)) {
    
      ins = 0;

      /* for the sake of efficiency, we distinguish here between
       * initial matchlists and non-initial matchlists. Two almost
       * identical loops are performed, but we do the test for initial
       * mls instead of inside the loop here */
      
      if (list1->end)

        for (i = 0; i < list1->tabsize; i++) {

          if (list1->start[i] != -1) {

            /* copy the data from the current position
             * down to the insertion point.
             */

            if (i != ins) {
              list1->start[ins] = list1->start[i];
              list1->end[ins]   = list1->end[i];
              if (list1->target_positions)
                list1->target_positions[ins]   = list1->target_positions[i];
            }
            ins++;
          }
        }
      else
        for (i = 0; i < list1->tabsize; i++) {

          if (list1->start[i] != -1) {

            /* copy the data from the current position
             * down to the insertion point.
             */

            if (i != ins)
              list1->start[ins] = list1->start[i];
            if (list1->target_positions)
              list1->target_positions[ins]   = list1->target_positions[i];
            ins++;
          }
        }
      
      if (ins == 0) {

        /*
         * all elements have been deleted. So free the used space.
         */

        cl_free(list1->start);
        cl_free(list1->end);
        cl_free(list1->target_positions);
        list1->tabsize = 0;
        list1->matches_whole_corpus = 0;
        list1->is_inverted = 0;
      }
      else if (ins != list1->tabsize) {

        /*
         * no elements were deleted from the list when ins==tabsize. So
         * we do not have to do anything then.
         * Otherwise, the list was used destructively. Free up used space.
         */

        list1->start = (int *)cl_realloc(list1->start, sizeof(int) * ins);
        if (list1->end)
          list1->end = (int *)cl_realloc(list1->end,   sizeof(int) * ins);
        if (list1->target_positions)
          list1->target_positions = (int *)cl_realloc(list1->target_positions, sizeof(int) * ins);
        list1->tabsize = ins;
        list1->matches_whole_corpus = 0;
        list1->is_inverted = 0;
      }
    }
    break;
    
  default:
    assert("Illegal operator in Setop" && 0);
    return 0;
    break;
  }

  return 1;
}
Esempio n. 14
0
int
MakeExp(int Q, ExpTag Tag, ...)
{

  va_list AP; 
  Symbol Sym = NULL; 

  int H = 0; 
  byte Args = 0; 
  Exp HP, E; 
  int Q0 = 0, Q1 = 0;

  va_start(AP, Tag);

  switch (Tag) 
    {
    case SymX:
      Sym = va_arg(AP, Symbol);
      H = 0x100 + Sym->Hash; 
      Args = 0;
      for (HP = ExpHash[H]; HP != 0; HP = HP->Tail)
        if (Sym == HP->Body.Leaf) 
          {
            if (Q != -1 && Q != HP->Class) 
              EquTab[Q].Value = HP;
            return HP->Class;
          }
      break;
    case ZeroX: 
      H = 0; 
      goto MakeNullary;
    case OneX: 
      H = 1; 
      goto MakeNullary;
    MakeNullary:
      Args = 0; 
      HP = ExpHash[H];
      if (HP != 0) 
        {
          if (Q != -1 && Q != HP->Class) 
            EquTab[Q].Value = HP;
          return HP->Class;
        }
      break;
    case PlusX:
      Q0 = va_arg(AP, int); 
      H = 0x02 + EquTab[Q0].Hash*0x0a/0x200;
      goto MakeUnary;
    case StarX:
      Q0 = va_arg(AP, int); 
      H = 0x0c + EquTab[Q0].Hash*0x14/0x200;
      goto MakeUnary;
    case OptX:
      Q0 = va_arg(AP, int); 
      H = 0x20 + EquTab[Q0].Hash/0x10;
    MakeUnary:
      Args = 1;
      for (HP = ExpHash[H]; HP != 0; HP = HP->Tail)
        if (Q0 == HP->Body.Arg[0]) 
          {
            if (Q != -1 && Q != HP->Class) 
              EquTab[Q].Value = HP;
            return HP->Class;
          }
      break;
    case OrX:
      Q0 = va_arg(AP, int);
      Q1 = va_arg(AP, int);
      H = 0x40 + DUP(EquTab[Q0].Hash, EquTab[Q1].Hash)/8;
      goto MakeBinary;
    case AndX:
      Q0 = va_arg(AP, int);
      Q1 = va_arg(AP, int);
      H = 0x80 + DUP(EquTab[Q0].Hash, EquTab[Q1].Hash)/4;
    MakeBinary:
      Args = 2;
      for (HP = ExpHash[H]; HP != 0; HP = HP->Tail)
        if (Q0 == HP->Body.Arg[0] && Q1 == HP->Body.Arg[1]) 
          {
            if (Q != -1 && Q != HP->Class) 
              EquTab[Q].Value = HP;
            return HP->Class;
          }
      break;
    }
  va_end(AP);
  E = (Exp)cl_malloc(sizeof *E);
  E->Tag = Tag;
  if (Tag == SymX) 
    E->Body.Leaf = Sym;
  else 
    {
      E->Body.Arg = (int *) ((Args > 0) ? cl_malloc(Args*sizeof(int)) : NULL);
      if (Args > 0) 
        E->Body.Arg[0] = Q0;
      if (Args > 1) 
        E->Body.Arg[1] = Q1;
    }
  E->Hash = H;
  E->Tail = ExpHash[H];
  ExpHash[H] = E;
  if (Q == -1) 
    {
      if (Equs == EquMax) 
        {
          EquMax += EQU_EXTEND;
          EquTab = (Equation)cl_realloc(EquTab, sizeof *EquTab * EquMax);
        }
      EquTab[Equs].Hash = H;
      EquTab[Equs].Stack = 0;
      Q = Equs++;
    }
  EquTab[Q].Value = E; 
  E->Class = Q; 
  return Q;
}
Esempio n. 15
0
/** TODO delete: has been replaced throughout with cl_realloc */
void *
Reallocate(void *X, unsigned Bytes)
{
  X = cl_realloc(X, Bytes);
  return X;
}