Example #1
0
/**
 * Parses an input line into cwb-s-encode.
 *
 * Usage:
 *
 * ok = sencode_parse_line(char *line, int *start, int *end, char **annot);
 *
 * Expects standard TAB-separated format; first two fields must be numbers,
 * optional third field is returned in annot - if not present, annot is
 * set to NULL.
 *
 * @param line   The line to be parsed.
 * @param start  Location for the start cpos.
 * @param end    Location for the end cos.
 * @param annot  Location for the annotation string.
 * @return Boolean; true for all OK, false for error.
 */
int
sencode_parse_line(char *line, int *start, int *end, char **annot)
{
  char *field, *field_end;
  char *line_copy = cl_strdup(line); /* work on copy to retain original for error messages */
  int has_annotation = 1;

  /* first field: INT range_start */
  field = line_copy;
  field_end = strchr(field, '\t');
  if (field_end == NULL)
    return 0;
  else {
    *field_end = 0;
    errno = 0;
    *start = atoi(field);
    if (errno != 0 || *start < 0) return 0;
    field = field_end + 1;
  }

  /* second field: INT range_end */
  field_end = strchr(field, '\t');
  if (field_end == NULL) {
    has_annotation = 0;
    field_end = strchr(field, '\n');
  }
  if (field_end == NULL)
    return 0;
  else {
    *field_end = 0;
    errno = 0;
    *end = atoi(field);
    if (errno != 0 || *end < 0) return 0;
    field = field_end + 1;
  }

  /* optional third field: STRING annotation */
  if (has_annotation) {
    field_end = strchr(field, '\t');
    if (field_end != NULL) {
      return 0;                 /* make sure there are no extra fields */
    }
    else {
      field_end = strchr(field, '\n');
      if (field_end == NULL) {
        return 0;
      }
      else {
        *field_end = 0;
        *annot = cl_strdup(field);
      }
    }
  }
  else {
    *annot = NULL;
  }

  cl_free(line_copy);
  return 1;                     /* OK */
}
Example #2
0
/** check variable's strings against corpus.attribute lexicon */
int
VerifyVariable(Variable v, 
               Corpus *corpus,
               Attribute *attribute)
{
  int i;

  int nr_valid, nr_invalid;

  if (v->valid == 0 || 
      v->my_corpus == NULL || v->my_attribute == NULL ||
      strcmp(v->my_corpus, corpus->registry_name) != 0 ||
      strcmp(v->my_attribute, attribute->any.name) != 0) {
    
    v->valid = 0;
    cl_free(v->my_corpus);
    cl_free(v->my_attribute);

    if (attribute->any.type != ATT_POS) {
      return 0;
    }

    v->my_corpus = cl_strdup(corpus->registry_name);
    v->my_attribute = cl_strdup(attribute->any.name);
    
    nr_valid = 0;
    nr_invalid = 0;
    
    for (i = 0; i < v->nr_items; i++) {

      if (!v->items[i].free) {
        if (v->items[i].sval == NULL) {
          fprintf(stderr, "Error #1 in variable logic. Contact developer.\n");
          v->items[i].ival = -1;
        }
        else
          v->items[i].ival = get_id_of_string(attribute, v->items[i].sval);

        if (v->items[i].ival < 0)
          nr_invalid++;
        else
          nr_valid++;
      }
    }
    
    v->nr_valid_items = nr_valid;
    v->nr_invalid_items = nr_invalid;
    
    if (nr_valid > 0)
      v->valid = 1;
    else
      v->valid = 0;
  }
  
  return v->valid;
}
Example #3
0
int
VariableAddItem(Variable v, char *item)
{
  int i;

  if (!VariableItemMember(v, item)) {
    
    v->valid = 0;
    
    for (i = 0; i < v->nr_items; i++)
      if (v->items[i].free) {
        v->items[i].free = 0;
        v->items[i].sval = cl_strdup(item);
        v->items[i].ival = -1;
        break;
      }

    if (i >= v->nr_items) {

      /* no space in list. malloc. */

      v->nr_items += ITEM_REALLOC;
      
      if (v->items == NULL) 
        v->items = (VariableItem *)cl_malloc(sizeof(VariableItem) *
                                          v->nr_items);
      else 
        v->items = (VariableItem *)cl_realloc(v->items,
                                           sizeof(VariableItem) *
                                           v->nr_items);
      
      if (v->items == NULL) {
        fprintf(stderr, "Fatal Error #6: no memory left.");
        perror("Memory fault");
        assert(0 && "Big Problem here!");
      }

      v->items[i].sval = cl_strdup(item);
      v->items[i].free = 0;
      v->items[i].ival = -1;

      i++;

      for ( ; i < v->nr_items; i++) {
        v->items[i].sval = NULL;
        v->items[i].free = 1;
        v->items[i].ival = -1;
      }
    }
  }
  return 1;
}
Example #4
0
/**
 * Initialises the "new_satt" variable for the s-attribute to be encoded,
 * and sets name/directory
 */
void
sencode_declare_new_satt(char *name, char *directory, int store_values)
{
  new_satt.name = cl_strdup(name);
  new_satt.dir = cl_strdup(directory);

  new_satt.num = 0;
  new_satt.offset = 0;
  new_satt.store_values = store_values;
  new_satt.last_cpos = -1;

  new_satt.ready = 0;
  new_satt.fd = NULL;
  new_satt.avs = NULL;
  new_satt.avx = NULL;
}
Example #5
0
File: auth.c Project: rforge/rcwb
void 
add_user_to_list(char *user, char *passwd)
{
  UserEntry *new_user;

  if (find_user(user) != NULL) {
    fprintf(stderr, "WARNING: user '%s' already in list (ignored)\n", user);
  }
  else {
    new_user = (UserEntry *) cl_malloc(sizeof(UserEntry));
    new_user->name = cl_strdup(user); 
    new_user->passwd = cl_strdup(passwd);
    new_user->grants = NULL;
    new_user->next = authorized_users;
    authorized_users = new_user;
  }
}
Example #6
0
File: auth.c Project: rforge/rcwb
void 
add_grant_to_last_user(char *corpus)
{
  Grant *grant;

  assert(authorized_users);        /* need a 'last user' in list */
  grant = (Grant *) cl_malloc(sizeof(Grant));
  grant->corpus = cl_strdup(corpus);
  grant->next = authorized_users->grants;
  authorized_users->grants = grant;
}
Example #7
0
Variable
NewVariable(char *varname)
{
  Variable v;
  int i;

  if (varname == NULL)
    return NULL;

  v = (Variable)cl_malloc(sizeof(VariableBuffer));
  v->valid = 0;
  v->my_name = cl_strdup(varname);
  v->my_corpus = NULL;
  v->my_attribute = NULL;
  v->nr_items = 0;
  v->items = NULL;

  for (i = 0; i < nr_variables; i++) {
    if (VariableSpace[i] == NULL) {
      VariableSpace[i] = v;
      break;
    }
  }

  if (i >= nr_variables) {

    /* not inserted, malloc */
    
    nr_variables += VARIABLE_REALLOC;

    if (VariableSpace == NULL)
      VariableSpace = (Variable *)cl_malloc(nr_variables * sizeof(Variable));
    else
      VariableSpace = (Variable *)cl_realloc(VariableSpace, 
                                          nr_variables * sizeof(Variable));
    if (VariableSpace == NULL) {
      fprintf(stderr, "Fatal Error: Variable space out of memory.\n");
      assert(0 && "Sorry, big problem here!");
    }
    
    VariableSpace[i++] = v;

    for ( ; i < nr_variables; i++)
      VariableSpace[i] = NULL;
  }

  return v;
}
Example #8
0
File: regopt.c Project: cran/rcqp
/**
 * Internal regopt function: copies optimiser data from internal global variables
 * to the member variables of argument CL_Regex object.
 */
void
regopt_data_copy_to_regex_object(CL_Regex rx)
{
  int i;

  rx->grains = cl_regopt_grains;
  rx->grain_len = cl_regopt_grain_len;
  rx->anchor_start = cl_regopt_anchor_start;
  rx->anchor_end = cl_regopt_anchor_end;
  for (i = 0; i < 256; i++)
    rx->jumptable[i] = cl_regopt_jumptable[i];
  for (i = 0; i < rx->grains; i++)
    rx->grain[i] = cl_strdup(cl_regopt_grain[i]);
  if (cl_debug)
   Rprintf( "CL: using %d grain(s) for optimised regex matching\n", rx->grains);
}
Example #9
0
File: output.c Project: rforge/rcwb
/**
 * Create a pipe to a new instance of a specified program to be used as
 * an output pager.
 *
 * If cmd is different from the program specified in the global
 * variable "tested_pager", run a test first.
 *
 * This would normally be something like "more" or "less".
 *
 * @see            tested_pager
 * @see            less_charset_variable
 * @param cmd      Program command to start pager procress.
 * @param charset  Charset to which to set the pager-charset-environment variable
 * @return         Writable stream for the pipe to the pager, or NULL if a
 *                 test of the pager program failed.
 */
FILE *
open_pager(char *cmd, CorpusCharset charset)
{
  FILE *pipe;

  if ((tested_pager == NULL) || (strcmp(tested_pager, cmd) != 0)) {
    /* this is a new pager, so test it */
    pipe = popen(cmd, "w");
    if ((pipe == NULL) || (pclose(pipe) != 0)) {
      return NULL;              /* new pager cmd doesn't work -> return error */
    }
    if (tested_pager != NULL)
      cl_free(tested_pager);
    tested_pager = cl_strdup(cmd);
  }

  /* if (less_charset_variable != "" ) set environment variable accordingly */
  if (*less_charset_variable) {
    char *new_value;

    switch (charset){
    case ascii:   /* fallthru is intentional: ASCII is a subset of valid UTF-8 */
    case utf8:    new_value = "utf-8";    break;

    /* "less" does not distinguish between the different ISO-8859 character sets,
     * so if not using UTF-8, always use ISO-8859
     */
    default:      new_value = "iso8859";  break;
    }

    char *current_value = getenv(less_charset_variable);

    /* call setenv() if variable is not set or different from desired value */
    if (!current_value || strcmp(current_value, new_value)) {
      setenv(less_charset_variable, new_value, 1);
    }
  }

  pipe = popen(cmd, "w");
  return pipe;                  /* NULL if popen() failed for some reason */
}
Example #10
0
/** Look up the symbol contained in string S in the global hash table. */
Symbol
LookUp(char *S)
{
  Symbol Sym; 
  byte H;
  
  for (H = Hash(S), Sym = HashTab[H]; Sym != 0; Sym = Sym->Next)
    if (strcmp(Sym->Name, S) == 0) 
      return Sym;
  Sym = (Symbol)cl_malloc(sizeof *Sym);
  Sym->Name = cl_strdup(S);
  Sym->Hash = H;
  Sym->Next = HashTab[H];
  HashTab[H] = Sym;
  Sym->Tail = 0;
  if (FirstB == 0) 
    FirstB = Sym; 
  else 
    LastB->Tail = Sym;
  return LastB = Sym;
}
Example #11
0
static char*  
process_command(int argc, char * argv[])
{
	char *msg = NULL, *result = NULL;
	int i;
	char *buf = NULL;

	msg = mgmt_new_msg(argv[1], NULL);
	for(i = 2; i < argc; i++ ) {
		msg = mgmt_msg_append(msg, argv[i]);
	}

	cl_log(LOG_DEBUG, "msg sent: %s", msg);
	result = process_msg(msg);
	mgmt_del_msg(msg);
	if ( result == NULL ) {
		return NULL;
	}
	buf = cl_strdup(result);
	mgmt_del_msg(result);
	return buf;
}
Example #12
0
/**
 * insert region [start, end, annot] after SL_Point; no overlap/position checking
 */
SL
SL_insert_after_point(int start, int end, char *annot)
{
  /* allocate and initialise new item to insert into list */
  SL item = (SL) cl_malloc(sizeof(struct _SL));
  item->start = start;
  item->end = end;
  if (annot != NULL)
    item->annot = cl_strdup(annot);
  else
    item->annot = NULL;
  item->prev = NULL;
  item->next = NULL;

  /* this function has to handle a number of special cases ... */
  if (SL_Point == NULL) {          /* insert at start of list */
    if (StructureList == NULL) {   /* empty list */
      SL_Point = StructureList = item;
    }
    else {
      item->next = StructureList;
      StructureList->prev = item;
      SL_Point = StructureList = item;
    }
  }
  else if (SL_Point->next == NULL) { /* insert at end of list */
    item->prev = SL_Point;
    SL_Point = SL_Point->next = item;
  }
  else {                         /* insert somewhere inside list */
    item->next = SL_Point->next; /* links between new item and following item */
    SL_Point->next->prev = item;
    SL_Point->next = item;       /* links between point and new item */
    item->prev = SL_Point;
    SL_Point = item;
  }
  return SL_Point;
}
Example #13
0
/**
 * This is the main function to update resource table v2.
 * Return the number of resources.
 */
static int
update_resources_recursively(GListPtr reslist, GListPtr nodelist, int index)
{

    if (reslist == NULL) {
        return index;
    }
    /*
     * Set resource info to resource table v2 from data_set,
     * and add it to Glib's array.
     */
    slist_iter(rsc, resource_t, reslist, lpc1,
    {
        cl_log(LOG_DEBUG, "resource %s processing.", rsc->id);
        slist_iter(node, node_t, nodelist, lpc2,
        {
            struct hb_rsinfov2 *rsinfo;
            enum rsc_role_e rsstate;

            rsinfo = (struct hb_rsinfov2 *) cl_malloc(sizeof(struct hb_rsinfov2));
            if (!rsinfo) {
                cl_log(LOG_CRIT, "malloc resource info v2 failed.");
                return HA_FAIL;
            }

            rsinfo->resourceid = cl_strdup(rsc->id);
            rsinfo->type = PE_OBJ_TYPES2AGENTTYPE(rsc->variant);

            /* using a temp var to suppress casting warning of the compiler */
            rsstate = rsc->fns->state(rsc, TRUE);
            {
                GListPtr running_on_nodes = NULL;

                rsc->fns->location(rsc, &running_on_nodes, TRUE);
                if (pe_find_node_id(
                    running_on_nodes, node->details->id) == NULL) {
                    /*
                     * if the resource is not running on current node,
                     * its status is "stopped(1)".
                     */
                    rsstate = RSC_ROLE_STOPPED;
                }
               g_list_free(running_on_nodes);
            }
            rsinfo->status = RSC_ROLE_E2AGENTSTATUS(rsstate);
            rsinfo->node = cl_strdup(node->details->uname);

            if (is_not_set(rsc->flags, pe_rsc_managed)) {
                rsinfo->is_managed = LHARESOURCEISMANAGED_UNMANAGED;
            } else {
                rsinfo->is_managed = LHARESOURCEISMANAGED_MANAGED;
            }

            /* get fail-count from <status> */
            {
                char *attr_name = NULL;
                char *attr_value = NULL;
                crm_data_t *tmp_xml = NULL;

                attr_name = crm_concat("fail-count", rsinfo->resourceid, '-');
                attr_value = g_hash_table_lookup(node->details->attrs,
                    attr_name); 
                rsinfo->failcount = crm_parse_int(attr_value, "0");
                crm_free(attr_name);
                free_xml(tmp_xml);
             }

            if (rsc->parent != NULL) {
                rsinfo->parent = cl_strdup(rsc->parent->id);
            } else {
                rsinfo->parent = cl_strdup("");
            }

            /*
             * if the resource stops, and its fail-count is 0,
             * don't list it up.
             */ 
            if (rsinfo->status != LHARESOURCESTATUS_STOPPED || 
                   rsinfo->failcount > 0) {
                rsinfo->index = index++;
                g_ptr_array_add(gResourceTableV2, (gpointer *)rsinfo);
            } else {
                cl_free(rsinfo->resourceid);
                cl_free(rsinfo->node);
                cl_free(rsinfo->parent);
                cl_free(rsinfo);
            }

        }); /* end slist_iter(node) */

        /* add resources recursively for group/clone/master */
        index = update_resources_recursively(rsc->fns->children(rsc),
            nodelist, index);

    }); /* end slist_iter(rsc) */
Example #14
0
File: output.c Project: rforge/rcwb
/* tabulate specified query result, using settings from global list of tabulation items;
   return value indicates whether tabulation was successful (otherwise, generates error message) */
int
print_tabulation(CorpusList *cl, int first, int last, struct Redir *rd)
{
  TabulationItem item = TabulationList;
  int current;
  
  if (! cl) 
    return 0;

  if (first <= 0) first = 0;    /* make sure that first and last match to tabulate are in range */
  if (last >= cl->size) last = cl->size - 1;

  while (item) {                /* obtain attribute handles for tabulation items */
    if (item->attribute_name) {
      if (NULL != (item->attribute = cl_new_attribute(cl->corpus, item->attribute_name, ATT_POS))) {
        item->attribute_type = ATT_POS;
      }
      else if (NULL != (item->attribute = cl_new_attribute(cl->corpus, item->attribute_name, ATT_STRUC))) {
        item->attribute_type = ATT_STRUC;
        if (! cl_struc_values(item->attribute)) {
          cqpmessage(Error, "No annotated values for s-attribute ``%s'' in named query %s", item->attribute_name, cl->name);
          return 0;
        }
      }
      else {
        cqpmessage(Error, "Can't find attribute ``%s'' for named query %s", item->attribute_name, cl->name);
        return 0;
      }
    }
    else {
      item->attribute_type = ATT_NONE; /* no attribute -> print corpus position */
    }
    if (cl->size > 0) {
      /* work around bug: anchor validation will fail for empty query result (but then loop below is void anyway) */
      if (! (pt_validate_anchor(cl, item->anchor1) && pt_validate_anchor(cl, item->anchor2)))
	return 0;
    }
    item = item->next;
  }

  if (! open_stream(rd, cl->corpus->charset)) {
    cqpmessage(Error, "Can't redirect output to file or pipe\n");
    return 0;
  }

  /* tabulate selected attribute values for matches <first> .. <last> */
  for (current = first; current <= last; current++) {
    TabulationItem item = TabulationList;
    while (item) {
      int start = pt_get_anchor_cpos(cl, current, item->anchor1, item->offset1);
      int end   = pt_get_anchor_cpos(cl, current, item->anchor2, item->offset2);
      int cpos;

      if (start < 0 || end < 0) /* one of the anchors is undefined -> print single undefined value for entire range */
        start = end = -1;

      for (cpos = start; cpos <= end; cpos++) {
        if (item->attribute_type == ATT_NONE) {
          fprintf(rd->stream, "%d", cpos);
        }
        else {
          if (cpos >= 0) {      /* undefined anchors print empty string */
            char *string = NULL;
            if (item->attribute_type == ATT_POS) 
              string = cl_cpos2str(item->attribute, cpos);
            else
              string = cl_cpos2struc2str(item->attribute, cpos);
            if (string) {
              if (item->flags) {
                char *copy = cl_strdup(string);
                cl_string_canonical(copy, cl->corpus->charset, item->flags);
                fprintf(rd->stream, "%s", copy);
                cl_free(copy);
              }
              else {
                fprintf(rd->stream, "%s", string);
              }
            }
          }
        }
        if (cpos < end)         /* multiple values for tabulation item are separated by blanks */
          fprintf(rd->stream, " "); 
      }
      if (item->next)           /* multiple tabulation items are separated by TABs */
        fprintf(rd->stream, "\t");
      item = item->next;
    }
    fprintf(rd->stream, "\n");
  }
  
  close_stream(rd);
  free_tabulation_list();
  return 1;
}
Example #15
0
File: output.c Project: rforge/rcwb
/**
 * Open the stream within a Redir structure.
 *
 * @param rd       Redir structure to be opened.
 * @param charset  The charset to be used. Only has an effect if the stream
 *                 to be opened is to an output pager.
 * @return         True for success, false for failure.
 */
int
open_stream(struct Redir *rd, CorpusCharset charset)
{
  int i;

  assert(rd);

  if (rd->name) {
    i = 0;
    while (rd->name[i] == ' ')
      i++;
    
    if ((rd->name[i] == '|') &&
        (rd->name[i+1] != '\0')) {
      
      if (insecure) {
        /* set stream to NULL to force return value of 0 */
        rd->stream = NULL;
        rd->is_pipe = False;
        rd->is_paging = False;
      }
      else {
        
        /* we send the output to a pipe */
        rd->is_pipe = True;
        rd->is_paging = False;
        rd->stream = popen(rd->name+i+1, rd->mode);
      }
    }
    else {

      /* normal output to file */
      rd->is_pipe = False;
      rd->is_paging = False;
      rd->stream = open_file(rd->name, rd->mode);
    }
  }
  else { /* i.e. if rd->name is NULL */
    if (pager && paging && isatty(fileno(NULL))) {
      if (insecure) {
        cqpmessage(Error, "Insecure mode, paging not allowed.\n");
        /* ... and default back to bare stdout */
        rd->stream = NULL;
        rd->is_paging = False;
        rd->is_pipe = False;
      }
      else if ((rd->stream = open_pager(pager, charset)) == NULL) {
        cqpmessage(Warning, "Could not start pager '%s', trying fallback '%s'.\n", pager, CQP_FALLBACK_PAGER);
        if ((rd->stream = open_pager(CQP_FALLBACK_PAGER, charset)) == NULL) {
          cqpmessage(Warning, "Could not start fallback pager '%s'. Paging disabled.\n", CQP_FALLBACK_PAGER);
          set_integer_option_value("Paging", 0);
          rd->is_pipe = False;
          rd->is_paging = False;
          rd->stream = NULL;
        }
        else {
          rd->is_pipe = 1;
          rd->is_paging = True;
          set_string_option_value("Pager", cl_strdup(CQP_FALLBACK_PAGER));
        }
      }
      else {
        rd->is_pipe = 1;
        rd->is_paging = True;
      }
    }
    else {
      rd->stream = NULL;
      rd->is_paging = False;
      rd->is_pipe = False;
    }
  }
  return (rd->stream == NULL ? 0 : 1);
}
Example #16
0
/**
 * Adds a new AttributeInfo to an AttributeList object.
 *
 * @param list            The list to add to.
 * @param name            The name of the Attribvute that this AttributeInfo refers to.
 * @param initial_status  Initial setting for the status member of the new AttributeInfo.
 * @param position        If this is 1, the new AttributeInfo is added at the beginning
 *                        of the list. If it is 0, it is added at the end of the list.
 *                        Otherwise, this specifies a particular insertion position
 *                        (the given number of steps down the linked list).
 * @return                A pointer to the new AttributeInfo, or NULL for error.
 */
AttributeInfo
*AddNameToAL(AttributeList *list,
             char *name,
             int initial_status,
             int position)
{
  if (MemberAL(list, name))
    return NULL;
  else {

    AttributeInfo *ai;
    
    ai = (AttributeInfo *)cl_malloc(sizeof(AttributeInfo));

    ai->status = initial_status;
    ai->name = cl_strdup(name);
    ai->attribute = NULL;
    ai->next = NULL;
    ai->prev = NULL;

    if (list->list == NULL)
      list->list = ai;
    else {

      if (position == 1) {

        /* insertion at beginning */
        ai->next = list->list;
        list->list = ai;

      }
      else if (position == 0) {

        /* insert new element at end of list */

        AttributeInfo *prev;

        prev = list->list;

        while (prev->next)
          prev = prev->next;

        ai->prev = prev;
        prev->next = ai;
      }
      else {

        /* insert new element at certain position */

        AttributeInfo *prev;

        prev = list->list;

        while (prev->next && position > 2) {
          prev = prev->next;
          position--;
        }

        ai->prev = prev;
        ai->next = prev->next;

        prev->next->prev = ai;
        prev->next = ai;
      }
    }

    /* return the new element */

    list->list_valid = 0;

    return ai;
  }
}
Example #17
0
void
get_screen_escapes(void)
{
  int status, l;
  char *term;

  sc_s_in = "";
  sc_s_out = "";
  sc_u_in = "";
  sc_u_out = "";
  sc_b_in = "";
  sc_b_out = "";
  sc_bl_in = "";
  sc_bl_out = "";

  if ((term = getenv("TERM")) == NULL)
    return;

  if ((setupterm(term, 1, &status) == ERR) || (status != 1)) {
    return;
  }  

  /* turn off all attributes */
  sc_all_out = tigetstr("sgr0");
  if (sc_all_out == NULL) sc_all_out = "";

  /* Linux terminfo bug? fix: tigetstr("sgr0") returns an extra ^O (\x0f) character appended to the escape sequence
     (this may be some code used internally by the ncurses library).
     Since weRprintf() the escape sequences directly, we have to remove the extra character or 'less -R' will get confused. */
  l = strlen(sc_all_out);
  if ((l > 0) && (sc_all_out[l-1] == '\x0f')) {
    sc_all_out = cl_strdup(sc_all_out);
    sc_all_out[l-1] = 0;        /* just chop of the offending character */
  }


  /* standout mode */
  sc_s_in = tigetstr("smso");
  if (sc_s_in == NULL) sc_s_in = "";
  sc_s_out = tigetstr("rmso");
  if (sc_s_out == NULL) sc_s_out = "";

  /* underline */
  sc_u_in = tigetstr("smul");
  if (sc_u_in == NULL) sc_u_in = sc_s_in;
  sc_u_out = tigetstr("rmul");
  if (sc_u_out == NULL) sc_u_out = sc_s_out;
  
  /* bold */
  sc_b_in = tigetstr("bold");
  if (sc_b_in == NULL) {
    sc_b_in = sc_s_in;
    sc_b_out = sc_s_out;
  }
  else {
    sc_b_out = tigetstr("sgr0"); /* can't turn off bold explicitly */
    if (sc_b_out == NULL) sc_b_out = "";
  }

  /* blink */
  sc_bl_in = tigetstr("blink");
  if (sc_bl_in == NULL) {
    sc_bl_in = sc_s_in;
    sc_bl_out = sc_s_out;
  }
  else {
    sc_bl_out = sc_all_out;      /* can't turn off blinking mode explicitly */
  }

  escapes_initialized++;
  
  /* in highlighted mode, switch off display attributes at end of line (to be on the safe side) */
  ASCIIHighlightedPrintDescriptionRecord.AfterLine = cl_malloc(strlen(sc_all_out) + 2);
  sprintf(ASCIIHighlightedPrintDescriptionRecord.AfterLine,
          "%s\n", sc_all_out);

  /* print cpos in blue, "print structures" in pink if we're in coloured mode */
  if (use_colour) {
    char *blue = get_colour_escape('b', 1);
    char *pink = get_colour_escape('p', 1);
    char *normal = get_typeface_escape('n');
    char *bold = get_typeface_escape('b');

    ASCIIHighlightedPrintDescriptionRecord.CPOSPrintFormat = cl_malloc(strlen(blue) + strlen(normal) + 8);
    sprintf(ASCIIHighlightedPrintDescriptionRecord.CPOSPrintFormat,
            "%s%c9d:%s ", blue, '%', normal);
    ASCIIHighlightedPrintDescriptionRecord.BeforePrintStructures = cl_malloc(strlen(pink) + strlen(bold) + 4);
    sprintf(ASCIIHighlightedPrintDescriptionRecord.BeforePrintStructures,
            "%s%s", pink, bold);
    ASCIIHighlightedPrintDescriptionRecord.AfterPrintStructures = cl_malloc(strlen(normal) + 6);
    sprintf(ASCIIHighlightedPrintDescriptionRecord.AfterPrintStructures,
            ":%s ", normal);
  }
}
Example #18
0
/* these two set integer or string-valued options. An error string
 * is returned if the type of the option does not correspond to
 * the function which is called. Upon success, NULL is returned.
 *
 * @param opt_name  The name of the option to set.
 * @param sval      String value.
 * @param ival      Integer value.
 * @return          NULL if all OK; otherwise a string describing the problem.
 */
char *
set_context_option_value(char *opt_name, char *sval, int ival)
{
  int opt;

  int context_type;

  opt = find_option(opt_name);

  if (opt < 0)
    return "No such option";
  else if (cqpoptions[opt].type == OptContext) {

    if (sval == NULL ||
        strcasecmp(sval, "character") == 0 ||
        strcasecmp(sval, "char") == 0 ||
        strcasecmp(sval, "chars") == 0 ||
        strcasecmp(sval, "characters") == 0)
      context_type = CHAR_CONTEXT;
    else if (strcasecmp(sval, "word") == 0 ||
             strcasecmp(sval, "words") == 0)
      context_type = WORD_CONTEXT;
    else
      context_type = STRUC_CONTEXT;

    if ((strcasecmp(opt_name, "LeftContext") == 0)
        || (strcasecmp(opt_name, "lc") == 0)) {

      CD.left_structure = NULL;
      CD.left_type = context_type;
      CD.left_width = ival;
      cl_free(CD.left_structure_name);
      if (context_type == STRUC_CONTEXT) {
        CD.left_structure_name = cl_strdup(sval);
      }
    }
    else if ((strcasecmp(opt_name, "RightContext") == 0)
             || (strcasecmp(opt_name, "rc") == 0)) {

      CD.right_structure = NULL;
      CD.right_type = context_type;
      CD.right_width = ival;
      cl_free(CD.right_structure_name);
      if (context_type == STRUC_CONTEXT) {
        CD.right_structure_name = cl_strdup(sval);
      }
    }
    else if ((strcasecmp(opt_name, "Context") == 0)
             || (strcasecmp(opt_name, "c") == 0)) {
      
      CD.left_structure = NULL;
      CD.left_type = context_type;
      CD.left_width = ival;
      cl_free(CD.left_structure_name);
      if (context_type == STRUC_CONTEXT) {
        CD.left_structure_name = cl_strdup(sval);
      }
      
      CD.right_structure = NULL;
      CD.right_type = context_type;
      CD.right_width = ival;
      cl_free(CD.right_structure_name);
      if (context_type == STRUC_CONTEXT) {
        CD.right_structure_name = cl_strdup(sval);
      }
    }
    else
      return "Illegal value for this option/??";

    execute_side_effects(opt);

    return NULL;

  }
  else
   return "Illegal value for this option";
}
Example #19
0
/**
 * Parses program options and sets their default values.
 *
 * @param ac  The program's argc.
 * @param av  The program's argv.
 */
void
parse_options(int ac, char *av[])
{
  extern char *optarg;
  /* optind and opterr unused, so don't declare them to keep gcc from complaining */
  /*   extern int   optind; */
  /*   extern int   opterr; */

  int c;
  int opt;
  char *valid_options = "";        /* set these depending on application */

  insecure = 0;

  progname = av[0];
  licensee =
    "\n"
    "The IMS Open Corpus Workbench (CWB)\n"
    "\n"
    "Copyright (C) 1993-2006 by IMS, University of Stuttgart\n"
    "Original developer:       Oliver Christ\n"
    "    with contributions by Bruno Maximilian Schulze\n"
    "Version 3.0 developed by: Stefan Evert\n"
    "    with contributions by Arne Fitschen\n"
    "\n"
    "Copyright (C) 2007-today by the CWB open-source community\n"
    "    individual contributors are listed in source file AUTHORS\n"
    "\n"
    "Download and contact: http://cwb.sourceforge.net/\n"
#ifdef COMPILE_DATE
    "\nCompiled:  " COMPILE_DATE
#endif
#ifdef VERSION
    "\nVersion:   " VERSION
#endif
    "\n";
  
  set_default_option_values();
  switch (which_app) {
  case cqp: 
    valid_options = "+b:cCd:D:ef:FhiI:l:L:mM:pP:r:R:sSvW:x";
    break;
  case cqpcl:
    valid_options = "+b:cd:D:E:FhiI:l:L:mM:r:R:sSvW:x";
    break;
  case cqpserver:
    valid_options = "+1b:d:D:FhI:l:LmM:P:qr:Svx";
    break;
  default:
    cqp_usage();
    /* this will display the 'unknown application' message */
  }

  while ((c = getopt(ac, av, valid_options)) != EOF)
    switch (c) {

    case '1':
      private_server = 1;
      break;

    case 'q':
      server_quit = 1;
      break;

    case 'x':
      insecure++;
      break;

    case 'C':
      use_colour++;
      break;

    case 'p':
      paging = 0;
      break;

    case 'D':
      default_corpus = cl_strdup(optarg);
      break;

    case 'E':
      if ((query_string = getenv(optarg)) == NULL) {
        fprintf(stderr, "Environment variable %s has no value, exiting\n", optarg);
        exit(1);
      }
      break;

    case 'r':
      registry = cl_strdup(optarg);
      break;

    case 'l':
      LOCAL_CORP_PATH = cl_strdup(optarg);
      break;

    case 'F':
      inhibit_activation++;
      break;

    case 'I':
      cqp_init_file = optarg;
      break;

    case 'm':
      enable_macros = 0;        /* -m = DISABLE macros */

    case 'M':
      macro_init_file = optarg;
      break;

    case 'P':
      if (which_app == cqpserver)        /* this option used in different ways by cqp & cqpserver */
        server_port = atoi(optarg);
      else
        pager = cl_strdup(optarg);
      break;

    case 'd':
      if (!silent) {

        opt = find_option(optarg);

        if ((opt >= 0) && (cqpoptions[opt].type == OptBoolean)) {
          /* TOGGLE the default value */
          *((int *)cqpoptions[opt].address) = cqpoptions[opt].idefault ? 0 : 1;
          execute_side_effects(opt);
        }
        else if (strcmp(optarg, "ALL") == 0) {
          /* set the debug values */
          verbose_parser = show_symtab = show_gconstraints =
            show_evaltree = show_patlist = show_dfa = show_compdfa =
            symtab_debug = parser_debug = eval_debug =
            initial_matchlist_debug = debug_simulation =
            search_debug = macro_debug = activate_cl_debug =
            server_debug = server_log = snoop = True;
          /* execute side effect for CLDebug option */
          cl_set_debug_level(activate_cl_debug);
        }
        else {
          fprintf(stderr, "Invalid debug mode: -d %s\nType '%s -h' for more information.\n",
                  optarg, progname);
          exit(1);
        }
      }
      break;
    case 'h':
      cqp_usage();
      break;
    case 'v':
      printf("%s\n", licensee);
      exit(0);
      break;
    case 's':
      subquery = 1;
      break;
    case 'S':
      if (handle_sigpipe)
        handle_sigpipe = 0;
      else 
        handle_sigpipe++;
      break;

    case 'W':
      CD.left_width = CD.right_width = atoi(optarg);
      execute_side_effects(3);
      break;

    case 'L':
      if (which_app == cqpserver)        /* used in different ways by cqpserver & cqp/cqpcl */
        localhost++;                        /* takes no arg with cqpserver */
      else 
        CD.left_width = atoi(optarg);
      break;

    case 'R':
      CD.right_width = atoi(optarg);
      break;

    case 'b':
      hard_boundary = atoi(optarg);
      break;

    case 'i':
      silent = rangeoutput = True;
      verbose_parser = show_symtab
        = show_gconstraints = show_evaltree
        = show_patlist = symtab_debug
        = parser_debug = eval_debug
        = search_debug = False;
      /* cf. options.h; there are more debug vars than this now, should they all be set to false? */
      break;
      
    case 'c':
      silent = child_process = True;
      paging = highlighting = False;
      autoshow = auto_save = False;
      progress_bar_child_mode(1);
      /* TODO: would it be useful to set PrettyPrint automatically to "off" in child mode? */
      break;

    case 'e':
      use_readline = True;
      break;

    case 'f':
      silent = batchmode = True;
      verbose_parser = show_symtab = show_gconstraints = 
        show_dfa = show_compdfa =
        show_evaltree = show_patlist =
        symtab_debug = parser_debug = eval_debug = search_debug = False;
      if (strcmp(optarg, "-") == 0) 
        batchfd = stdin;
      else if ((batchfd = open_file(optarg, "r")) == NULL) {
        perror(optarg);
        exit(1);
      }
      break;
    default:

      fprintf(stderr, "Invalid option. Type '%s -h' for more information.\n",
              progname);
      exit(1);
      break;
    }
}
Example #20
0
/**
 * Sets all the CQP options to their default values.
 */
void
set_default_option_values(void)
{
  int i;
  char *env;

  /* 6502 Assembler was not that bad compared to this ... */

  for (i = 0; cqpoptions[i].opt_name != NULL; i++) {

    if (cqpoptions[i].address) {
    
      switch(cqpoptions[i].type) {
      case OptString:

        *((char **)cqpoptions[i].address) = NULL;
        /* try environment variable first */
        if (cqpoptions[i].envvar != NULL) {
          env = getenv(cqpoptions[i].envvar);
          if (env != NULL)
            *((char **)cqpoptions[i].address) = cl_strdup((char *)getenv(cqpoptions[i].envvar));
        }
        /* otherwise, use internal default if specified */
        if (*((char **)cqpoptions[i].address) == NULL) {
          if (cqpoptions[i].cdefault)
            *((char **)cqpoptions[i].address) = cl_strdup(cqpoptions[i].cdefault);
          else
            *((char **)cqpoptions[i].address) = NULL;
        }

        break;

      case OptInteger:
      case OptBoolean:

        if (cqpoptions[i].envvar != NULL)
          *((int *)cqpoptions[i].address) = (getenv(cqpoptions[i].envvar) == NULL)
            ? cqpoptions[i].idefault
            : atoi(getenv(cqpoptions[i].envvar));
        else
          *((int *)cqpoptions[i].address) = cqpoptions[i].idefault;
        break;
      default:
        break;
      }
    }
  }
  
  query_string = NULL;
  cqp_init_file = NULL;
  macro_init_file = 0;
  inhibit_activation = 0;
  handle_sigpipe = 1;

  initialize_context_descriptor(&CD);
  CD.left_width = DEFAULT_CONTEXT;
  CD.left_type  = CHAR_CONTEXT;
  CD.right_width = DEFAULT_CONTEXT;
  CD.right_type  = CHAR_CONTEXT;

  CD.print_cpos = 1;

  /* TODO: should the following be scrubbed at some point? */
  ExternalSortingCommand = cl_strdup(DEFAULT_EXTERNAL_SORTING_COMMAND);
  ExternalGroupingCommand = cl_strdup(DEFAULT_EXTERNAL_GROUPING_COMMAND);
 
  /* CQPserver options */
  private_server = 0;
  server_port = 0;
  server_quit = 0;
  localhost = 0;

  matching_strategy = standard_match;        /* unfortunately, this is not automatically derived from the defaults */

  tested_pager = NULL;                /* this will be set to the PAGER command if that can be successfully run */


  /* execute some side effects for default values */
  cl_set_debug_level(activate_cl_debug);
  cl_set_optimize(query_optimize);
}
Example #21
0
/**
 * Carries out any "side effects" of setting an option.
 *
 * @param opt  The option that has just been set (index into the cqpoptions array).
 *
 * TODO This use of integer indexes as the pass from parse_options is very messy....
 */
void
execute_side_effects(int opt)
{
  switch (cqpoptions[opt].side_effect) {
  case 0:  /* <no side effect> */
    break;
  case 1:  /* set Registry "..."; */
    check_available_corpora(SYSTEM);
    break;
  case 2:  /* set DataDirectory "..."; */
    check_available_corpora(SUB);
    break;
  case 3:  /* set Optimize (on | off); */
    cl_set_optimize(query_optimize); /* enable / disable CL optimisations, too */
    break;
  case 4:  /* set CLDebug (on | off); */
    cl_set_debug_level(activate_cl_debug); /* enable / disable CL debugging */
    break;
    
    /* slot 5 is free */

  case 6:  /* set PrintMode (ascii | sgml | html | latex); */
    if (printModeString == NULL || strcasecmp(printModeString, "ascii") == 0)
      GlobalPrintMode = PrintASCII;
    else if (strcasecmp(printModeString, "sgml") == 0)
      GlobalPrintMode = PrintSGML;
    else if (strcasecmp(printModeString, "html") == 0)
      GlobalPrintMode = PrintHTML;
    else if (strcasecmp(printModeString, "latex") == 0)
      GlobalPrintMode = PrintLATEX;
    else {
      cqpmessage(Error, "USAGE: set PrintMode (ascii | sgml | html | latex);");
      GlobalPrintMode = PrintASCII;
      cl_free(printModeString);
      printModeString = cl_strdup("ascii");
    }
    break;

  case 7:  /* set PrintStructures "..."; */
    if (CD.printStructureTags) {
      DestroyAttributeList(&CD.printStructureTags);
    }
    CD.printStructureTags = ComputePrintStructures(current_corpus);
    break;

  case 8:  /* set PrintOptions "...."; */
    ParsePrintOptions();
    break;

  case 9:  /* set MatchingStrategy ( traditional | shortest | standard | longest ); */
    if (strcasecmp(matching_strategy_name, "traditional") == 0) {
      matching_strategy = traditional;
    }
    else if (strcasecmp(matching_strategy_name, "shortest") == 0) {
      matching_strategy = shortest_match;
    }
    else if (strcasecmp(matching_strategy_name, "standard") == 0) {
      matching_strategy = standard_match;
    }
    else if (strcasecmp(matching_strategy_name, "longest") == 0) {
      matching_strategy = longest_match;
    }
    else {
      cqpmessage(Error, "USAGE: set MatchingStrategy (traditional | shortest | standard | longest);");
      matching_strategy = standard_match;
      cl_free(matching_strategy_name);
      matching_strategy_name = strdup("standard");
    }
    break;
    
  default:
    fprintf(stderr, "Unknown side-effect #%d invoked by option %s.\n", 
            cqpoptions[opt].side_effect, cqpoptions[opt].opt_name);
    assert(0 && "Aborted. Please contact technical support.");
  }
}
Example #22
0
/**
 * The return value is a newly-allocated string.
 */
char *
expand_filename(char *fname)
{
  char fn[CL_MAX_FILENAME_LENGTH];
  char *home;
  int s, t;

  s = 0;
  t = 0;

  for (s = 0; fname[s]; ) {

    if (fname[s] == '~' && (home = getenv("HOME")) != NULL) {
      
      int k;
      
      for (k = 0; home[k]; k++) {
        fn[t] = home[k];
        t++;
      }
      s++;
    }
    else if (fname[s] == '$') {

      /*  reference to the name of another component. */

      int rpos;
      char rname[CL_MAX_LINE_LENGTH];
      char *reference;

      s++;                        /* skip the $ */
      
      rpos = 0;
      while (isalnum(fname[s]) || fname[s] == '_') {
        rname[rpos++] = fname[s];
        s++;
      }
      rname[rpos] = '\0';

      reference = getenv(rname);

      if (reference == NULL) {
        fprintf(stderr, "options: can't get value of environment variable ``%s''\n", rname);

        fn[t++] = '$';
        reference = &rname[0];
      }
      
      for (rpos = 0; reference[rpos]; rpos++) {
        fn[t] = reference[rpos];
        t++;
      }
    } 
    else {
      fn[t] = fname[s];
      t++; s++;
    }
  }

  fn[t] = '\0';

  return cl_strdup(fn);
  
}
Example #23
0
/**
 * Main function for cwb-align-encode.
 *
 * @param argc   Number of command-line arguments.
 * @param argv   Command-line arguments.
 */
int
main(int argc, char *argv[])
{
  int argindex;                         /* index of first argument in argv[] */

  char *align_name = NULL;              /* name of the .align file */
  FILE *af = NULL;                      /* alignment file handle */
  int af_is_pipe;                       /* need to know whether to call fclose() or pclose() */
  char alx_name[CL_MAX_LINE_LENGTH];    /* full pathname of .alx file */
  char alg_name[CL_MAX_LINE_LENGTH];    /* full pathname of optional .alg file */
  FILE *alx=NULL, *alg=NULL;            /* file handles for .alx and optional .alg file */

  char line[CL_MAX_LINE_LENGTH];        /* one line of input from <infile> */

  char corpus1_name[CL_MAX_FILENAME_LENGTH];
  char corpus2_name[CL_MAX_FILENAME_LENGTH];
  char s1_name[CL_MAX_FILENAME_LENGTH];
  char s2_name[CL_MAX_FILENAME_LENGTH];
  Corpus *corpus1, *corpus2;            /* corpus handles */
  Attribute *w1, *w2;                   /* attribute handles for 'word' attributes; used to determine corpus size */
  int size1, size2;                     /* size of source & target corpus */

  Corpus *source_corpus;                /* encode alignment in this corpus (depends on -R flag, important for -D option) */
  char *source_corpus_name;             /* just for error messages */
  char *attribute_name;                 /* name of alignment attribute (depends on -R flag, must be lowercase) */

  int f1,l1,f2,l2;                      /* alignment regions */
  int current1, current2;
  int mark, n_0_1, n_1_0;

  int l;

  progname = argv[0];

  /* parse command line and read arguments */
  argindex = alignencode_parse_args(argc, argv, 1);
  align_name = argv[argindex];

  /* open alignment file and parse header; .gz files are automatically decompressed */
  af_is_pipe = 0;
  l = strlen(align_name);
  if ((l > 3) && (strncasecmp(align_name + l - 3, ".gz", 3) == 0)) {
    char *pipe_cmd = (char *) cl_malloc(l+10);
    sprintf(pipe_cmd, "gzip -cd %s", align_name); /* write .gz file through gzip pipe */
    af = popen(pipe_cmd, "r");
    if (af == NULL) {
      perror(pipe_cmd);
     Rprintf( "%s: can't read compressed file %s\n", progname, align_name);
      rcqp_receive_error(1);
    }
    af_is_pipe = 1;
    cl_free(pipe_cmd);
  }
  else {
    af = fopen(align_name, "r");
    if (af == NULL) {
      perror(align_name);
     Rprintf( "%s: can't read file %s\n", progname, align_name);
      rcqp_receive_error(1);
    }
  }

  /* read header = first line */
  fgets(line, CL_MAX_LINE_LENGTH, af);
  if (4 != sscanf(line, "%s %s %s %s", corpus1_name, s1_name, corpus2_name, s2_name)) {
   Rprintf( "%s: %s not in .align format\n", progname, align_name);
   Rprintf( "wrong header: %s", line);
    rcqp_receive_error(1);
  }
  if (verbose) {
    if (reverse)
     Rprintf("Encoding alignment for [%s, %s] from file %s\n", corpus2_name, corpus1_name, align_name);
    else
     Rprintf("Encoding alignment for [%s, %s] from file %s\n", corpus1_name, corpus2_name, align_name);
  }

  /* open corpora and determine their sizes (for validity checks and compatibility mode) */
  if (NULL == (corpus1 = cl_new_corpus(registry_dir, corpus1_name))) {
   Rprintf( "%s: can't open corpus %s\n", progname, corpus1_name);
    rcqp_receive_error(1);
  }
  if (NULL == (corpus2 = cl_new_corpus(registry_dir, corpus2_name))) {
   Rprintf( "%s: can't open corpus %s\n", progname, corpus2_name);
    rcqp_receive_error(1);
  }
  if (NULL == (w1 = cl_new_attribute(corpus1, "word", ATT_POS))) {
   Rprintf( "%s: can't open p-attribute %s.word\n", progname, corpus1_name);
    rcqp_receive_error(1);
  }
  if (NULL == (w2 = cl_new_attribute(corpus2, "word", ATT_POS))) {
   Rprintf( "%s: can't open p-attribute %s.word\n", progname, corpus2_name);
    rcqp_receive_error(1);
  }

  size1 = cl_max_cpos(w1);
  if (size1 <= 0) {
   Rprintf( "%s: data access error (%s.word)\n", progname, corpus1_name);
    rcqp_receive_error(1);
  }
  size2 = cl_max_cpos(w2);
  if (size2 <= 0) {
   Rprintf( "%s: data access error (%s.word)\n", progname, corpus2_name);
    rcqp_receive_error(1);
  }

  /* now work out the actual source corpus and the alignment attribute name (depending on -R flag) */
  source_corpus = (reverse) ? corpus2 : corpus1;
  source_corpus_name = (reverse) ? corpus2_name : corpus1_name;
  attribute_name = cl_strdup((reverse) ? corpus1_name : corpus2_name);
  cl_id_tolower(attribute_name); /* fold attribute name to lowercase */

  /* with -D option, determine data file name(s) from actual source corpus;
     otherwise use directory specified with -d and the usual naming conventions */
  if (data_dir_from_corpus) {
    Attribute *alignment = cl_new_attribute(source_corpus, attribute_name, ATT_ALIGN);
    char *comp_pathname;

    if (alignment == NULL) {
     Rprintf( "%s: alignment attribute %s.%s not declared in registry file\n",
              progname, source_corpus_name, attribute_name);
      rcqp_receive_error(1);
    }
    comp_pathname = component_full_name(alignment, CompXAlignData, NULL);
    if (comp_pathname == NULL) {
     Rprintf( "%s: can't determine pathname for .alx file (internal error)\n", progname);
      rcqp_receive_error(1);
    }
    strcpy(alx_name, comp_pathname); /* need to strcpy because component_full_name() returns pointer to internal buffer */
    if (compatibility) {
      comp_pathname = component_full_name(alignment, CompAlignData, NULL);
      if (comp_pathname == NULL) {
       Rprintf( "%s: can't determine pathname for .alg file (internal error)\n", progname);
        rcqp_receive_error(1);
      }
      strcpy(alg_name, comp_pathname);
    }
  }
  else {
    sprintf(alx_name, "%s" SUBDIR_SEP_STRING "%s.alx", data_dir, attribute_name);
    if (compatibility)
      sprintf(alg_name, "%s" SUBDIR_SEP_STRING "%s.alg", data_dir, attribute_name);
  }

  /* now open output file(s) */
  alx = fopen(alx_name, "wb");
  if (alx == NULL) {
    perror(alx_name);
   Rprintf( "%s: can't write file %s\n", progname, alx_name);
    rcqp_receive_error(1);
  }
  if (verbose)
   Rprintf("Writing file %s ...\n", alx_name);

  if (compatibility) {
    alg = fopen(alg_name, "wb");
    if (alg == NULL) {
      perror(alg_name);
     Rprintf( "%s: can't write file %s\n", progname, alg_name);
      rcqp_receive_error(1);
    }

    if (verbose)
     Rprintf("Writing file %s ...\n", alg_name);
  }

  /* main encoding loop */
  f1 = f2 = l1 = l2 = 0;
  mark = -1;                        /* check that regions occur in ascending order */
  current1 = current2 = -1;         /* for compatibility mode */
  n_0_1 = n_1_0 = 0;                /* number of 0:1 and 1:0 alignments, which are skipped */
  while (! feof(af)) {
    if (NULL == fgets(line, CL_MAX_LINE_LENGTH, af))
      break;                        /* end of file (or read error, which we choose to ignore) */
    if (4 != sscanf(line, "%d %d %d %d", &f1, &l1, &f2, &l2)) {
     Rprintf( "%s: input format error: %s", progname, line);
      rcqp_receive_error(1);
    }

    /* skip 0:1 and 1:0 alignments */
    if (l1 < f1) {
      n_0_1++; continue;
    }
    if (l2 < f2) {
      n_1_0++; continue;
    }

    /* check that source regions are non-overlapping and in ascending order */
    if (((reverse) ? f2 : f1) <= mark) {
     Rprintf( "%s: source regions of alignment must be in ascending order\n", progname);
     Rprintf( "Last region was [*, %d]; current is [%d, %d].\n", mark, f1, l1);
     Rprintf( "Aborted.\n");
      rcqp_receive_error(1);
    }
    mark = (reverse) ? l2 : l1;

    /* write alignment region to .alx file */
    if (reverse) {
      NwriteInt(f2, alx); NwriteInt(l2, alx);
      NwriteInt(f1, alx); NwriteInt(l1, alx);
    }
    else {
      NwriteInt(f1, alx); NwriteInt(l1, alx);
      NwriteInt(f2, alx); NwriteInt(l2, alx);
    }

    if (compatibility) {
      /* source and target regions of .alg file must be contiguous; store start points only; */
      /* hence we must collapse crossing alignments into one larger region (I know that's bullshit) */
      if ((f1 > current1) && (f2 > current2)) {
        if (reverse) {
          NwriteInt(f2, alg); NwriteInt(f1, alg);
        }
        else {
          NwriteInt(f1, alg); NwriteInt(f2, alg);
        }
        current1 = f1;
        current2 = f2;
      }
    }
  }
  if (compatibility) {
    if (reverse) {
      NwriteInt(size2, alg); NwriteInt(size1, alg); /* end of corpus alignment point*/
    }
    else {
      NwriteInt(size1, alg); NwriteInt(size2, alg); /* end of corpus alignment point*/
    }
  }

  if (verbose) {
   Rprintf("I skipped %d 0:1 alignments and %d 1:0 alignments.\n", n_0_1, n_1_0);
  }

  /* that's it; close file handles */
  fclose(alx);
  if (compatibility)
    fclose(alg);

  if (af_is_pipe)
    pclose(af);
  else
    fclose(af);

  return 0;
}
Example #24
0
File: tree.c Project: rforge/rcwb
/**
 * Converts an evaluation tree to a string.
 *
 * This is done by traversing the tree in
 * infix order.
 *
 * @param etptr   The evaluation tree to convert.
 * @param length  Size of the returned string is placed here.
 * @return        The resulting string.
 */
char *
evaltree2searchstr(Evaltree etptr, int *length)
{
  int n, p, l, min, max, remain;
  char numstr[10];


  char *left, *right, *result;
  int len_l, len_r;

  result = NULL;
  *length = 0;

  if(etptr != NULL) {
    if (etptr->node.type == node) {

      switch(etptr->node.op_id) {

      case re_od_concat: 
      case re_oi_concat: 
        assert(etptr->node.min == repeat_none);
        assert(etptr->node.min == repeat_none);
        
        left = evaltree2searchstr(etptr->node.left, &len_l);
        right = evaltree2searchstr(etptr->node.right, &len_r);
        *length = len_l + len_r + 1;
        result = (char *)cl_malloc(*length);
        sprintf(result, "%s %s", left, right);
        cl_free(left);
        cl_free(right);
        break;

      case re_disj:      
        assert(etptr->node.min == repeat_none);
        assert(etptr->node.min == repeat_none);

        left = evaltree2searchstr(etptr->node.left, &len_l);
        right = evaltree2searchstr(etptr->node.right, &len_r);
        *length = len_l + len_r + 7;
        result = (char *)cl_malloc(*length);
        sprintf(result, "( %s | %s )", left, right);
        cl_free(left);
        cl_free(right);
        break;

      case re_repeat:    
        assert(etptr->node.min != repeat_none);

        left = evaltree2searchstr(etptr->node.left, &len_l);
        
        min = etptr->node.min;
        max = etptr->node.max;

        /* check the special cases first
         */

        if ((min == 0) && (max == repeat_inf)) {

          *length = len_l + 5;
          result = (char *)cl_malloc(*length);
          sprintf(result, "( %s )*", left);
          cl_free(left);

        }
        else if ((min == 1) && (max == repeat_inf)) {

          *length = len_l + 5;
          result = (char *)cl_malloc(*length);
          sprintf(result, "( %s )+", left);
          cl_free(left);

        }
        else if ((min == 0) && (max == 1)) {

          *length = len_l + 4;
          result = (char *)cl_malloc(*length);
          sprintf(result, "[ %s ]", left);
          cl_free(left);

        }
        else {

          if (max == repeat_inf)
            remain = repeat_inf;
          else 
            remain = max - min;


          /* we need
           *   min * (len_l + 1) space for the minimum repetitions
           * plus
           *   if max != inf:  max - min * (len_l + 4)
           *   else:           len_l + 5
           * space for the string.
           */

          *length = min * (len_l + 1);


          if (remain == repeat_inf)
            *length = *length + len_l + 5;
          else 
            *length = *length + (remain * (len_l + 4));

          result = (char *)cl_malloc(*length);
          

          p = 0;                /* the pointer in result */


          /*
           * copy the minimum repetitions 
           */

          for(n = 0; n < min; n++) {
            
            for (l = 0; left[l]; l++) {
              result[p] = left[l];
              p++;
            }
            result[p++] = ' ';
          }

          if (remain == repeat_inf) {

            result[p++] = '(';
            result[p++] = ' ';

            for (l = 0; left[l]; l++) {
              result[p] = left[l];
              p++;
            }

            result[p++] = ' ';
            result[p++] = ')';
            result[p++] = '*';

          }
          else {
            
            for (n = 0; n < remain; n++) {
              result[p++] = '[';
              for (l = 0; left[l]; l++) {
                result[p] = left[l];
                p++;
              }
              result[p++] = ' ';
            }

            for (n = 0; n < remain; n++)
              result[p++] = ']';
          }
          result[p] = '\0';
          cl_free(left);
        }
        break;
      }
    }
    else {
      assert(etptr->leaf.type == leaf);
        
      sprintf(numstr, " \"%d\" ",etptr->leaf.patindex);
      result = cl_strdup(numstr);
      *length = strlen(result) + 1;
      
    }
  }
  return result;
}
Example #25
0
/**
 * Generates a set attribute value.
 *
 * @param s      The input string.
 * @param split  Boolean; if True, s is split on whitespace.
 *               If False, the function expects input in '|'-delimited format.
 * @return       The set attribute value in standard syntax ('|' delimited, sorted with cl_strcmp).
 *               If there is any syntax error, cl_make_set() returns NULL.
 */
char *
cl_make_set(char *s, int split)
{
  char *copy = cl_strdup(s);               /* work on copy of <s> */
  cl_string_list l = cl_new_string_list(); /* list of set elements */
  int ok = 0;                   /* for split and element check */
  char *p, *mark, *set;
  int i, sl, length;

  cl_errno = CDA_OK;

  /* (1) split input string into set elements */
  if (split) {
    /* split on whitespace */
    p = copy;
    while (*p != 0) {
      while (*p == ' ' || *p == '\t' || *p == '\n') {
        p++;
      }
      mark = p;
      while (*p != 0 && *p != ' ' && *p != '\t' && *p != '\n') {
        p++;
      }
      if (*p != 0) {            /* mark end of substring */
        *p = 0;
        p++;
      }
      else {
        /* p points to end of string; since it hasn't been advanced, the while loop will terminate */
      }
      if (p != mark) {
        cl_string_list_append(l, mark);
      }
    }
    ok = 1;                     /* split on whitespace can't really fail */
  }
  else {
    /* check and split '|'-delimited syntax */
    if (copy[0] == '|') {
      mark = p = copy+1;
      while (*p != 0) {
        if (*p == '|') {
          *p = 0;
          cl_string_list_append(l, mark);
          mark = p = p+1;
        }
        else {
          p++;
        }
      }
      if (p == mark) {          /* otherwise, there was no trailing '|' */
        ok = 1;
      }
    }
  }

  /* (2) check set elements: must not contain '|' character */
  length = cl_string_list_size(l);
  for (i = 0; i < length; i++) {
    if (strchr(cl_string_list_get(l, i), '|') != NULL) {
      ok = 0;
    }
  }

  /* (3) abort if there was any error */
  if (!ok) {
    cl_delete_string_list(l);
    cl_free(copy);
    cl_errno = CDA_EFSETINV;
    return NULL;
  }

  /* (4) sort set elements (for unify() function) */
  cl_string_list_qsort(l);

  /* (5) combine elements into set attribute string */
  sl = 2;                       /* compute length of string */
  for (i = 0; i < length; i++) {
    sl += strlen(cl_string_list_get(l, i)) + 1;
  }
  set = cl_malloc(sl);          /* allocate string of exact size */
  p = set;
  *p++ = '|';
  for (i = 0; i < length; i++) {
    strcpy(p, cl_string_list_get(l, i));
    p += strlen(cl_string_list_get(l, i));
    *p++ = '|';                 /* overwrites EOS mark inserted by strcpy() */
  }
  *p = 0;                       /* EOS */
 
  /* (6) free intermediate data and return the set string */
  cl_delete_string_list(l);
  cl_free(copy);
  return set;
}
Example #26
0
/**
 * Initializes the path of an attribute Component.
 *
 * This function starts with the path it is passed, and then evaluates variables
 * in the form $UPPERCASE. The resulting path is assigned to the specified
 * entry in the component array for the given Attribute.
 *
 * Note that if it is called for a Component that does not yet exist, this function
 * creates the component by calling declare_component().
 *
 * @see declare_component
 * @see Component_Field_Specs
 * @param attribute            The Attribute object to work with.
 * @param cid                  The identifier of the Component to which the path is to
 *                             be added.
 * @param path                 The path to assign to the component. Can be NULL,
 *                             in which case, the default path from Component_Field_Specs
 *                             is used.
 * @return                     Pointer to this function's static buffer for creating the
 *                             path (NB: NOT to the path in the actual component! which
 *                             is a copy). If a path already exists, a pointer to that
 *                             path. NULL in case of error in Component_Field_Specs.
 */
char *
component_full_name(Attribute *attribute, ComponentID cid, char *path)
{
  component_field_spec *compspec;
  Component *component;
  
  static char buf[CL_MAX_LINE_LENGTH];
  char rname[CL_MAX_LINE_LENGTH];
  char *reference;
  char c;

  int ppos, bpos, dollar, rpos;


  /*  did we do the job before? */
  
  if ((component = attribute->any.components[cid]) != NULL &&
      (component->path != NULL))
    return component->path;

  /*  yet undeclared. So try to guess the name: */
  
  compspec = NULL;

  if (path == NULL) {
    if ((compspec = find_cid_id(cid)) == NULL) {
      fprintf(stderr, "attributes:component_full_name(): Warning:\n"
              "  can't find component table entry for Component #%d\n", cid);
      return NULL;
    }
    path = compspec->default_path;
  }

  /* index in string "path" */
  ppos = 0;
  /* index in string "buf" */
  bpos = 0;
  dollar = 0;
  rpos = 0;
  buf[bpos] = '\0';

  while ((c = path[ppos]) != '\0') {
  
    if (c == '$') {

      /*  reference to the name of another component. */

      dollar = ppos;            /* memorize the position of the $ */

      rpos = 0;
      c = path[++ppos];         /* first skip the '$' */
      while (isupper(c)) {
        rname[rpos++] = c;
        c = path[++ppos];
      }
      rname[rpos] = '\0';

      /* ppos now points to the first character after the reference 
       * rname holds the UPPERCASE name of the referenced component
       */

      reference = NULL;

      if (STREQ(rname, "HOME"))
        reference = getenv(rname);
      else if (STREQ(rname, "APATH"))
        reference = (attribute->any.path ? attribute->any.path 
                     : attribute->any.mother->path);
      else if (STREQ(rname, "ANAME"))
        reference = attribute->any.name;
      else if ((compspec = find_cid_name(rname)) != NULL)
        reference = component_full_name(attribute, compspec->id, NULL);
      
      if (reference == NULL) {
        fprintf(stderr, "attributes:component_full_name(): Warning:\n"
                "  Can't reference to the value of %s -- copying\n",
                rname);
        reference = rname;
      }

      for (rpos = 0; reference[rpos] != '\0'; rpos++) {
        buf[bpos] = reference[rpos];
        bpos++;
      }
    }
    else {
      /* just copy the character and scroll */
      buf[bpos] = c;
      bpos++;
      ppos++;
    }
  }
  buf[bpos] = '\0';

  if (component != NULL)
    component->path = (char *)cl_strdup(buf);
  else
    (void) declare_component(attribute, cid, buf);

  /*  and return it */
  return &buf[0];
  /* ?? why is buf returned instead of component->path, as earlier in the function? -- AH 16/9/09 */
}
Example #27
0
/**
 * Creates a Mapping from a file.
 *
 * Each line in the file results in a SingleMapping (unless it begins in #,
 * in which case it either indicates the name of the mapping or is a comment).
 *
 * Within a single line, the first white-space delimited token represents
 * the name of the class, and the other tokens are attribute values.
 *
 * Any parse failure in the file will stop the entire Mapping-creation process
 * and result in NULL being returned.
 *
 * @param corpus        The corpus for which the Mapping is valid (pointer).
 * @param attr_name     String naming the attribute for which the mapping is valid.
 * @param file_name     The filename of the map spec.
 * @param error_string  A char * (not char[]), which is set to an error
 *                      string, or to NULL if all is OK.
 * @return              The resulting Mapping object, or NULL in case of error.
 */
Mapping
read_mapping(Corpus *corpus,
             char *attr_name,
             char *file_name,
             char **error_string)
{
  FILE *fd;
  Attribute *attr;
  Mapping m = NULL;
  char s[CL_MAX_LINE_LENGTH];

  if (corpus == NULL) {
    *error_string = "corpus argument missing";
    return NULL;
  }

  if (attr_name == NULL) {
    *error_string = "attribute name argument missing";
    return NULL;
  }

  if ((attr = find_attribute(corpus, attr_name, ATT_POS, NULL)) == NULL) {
    *error_string = "no such attribute in corpus";
    return NULL;
  }

  if ((fd = fopen(file_name, "r")) == NULL) {
    *error_string = "Can't open mapping file";
    return NULL;
  }

  m = cl_malloc(sizeof(MappingRecord));


  m->corpus = corpus;
  m->mapping_name = NULL;
  m->attribute = attr;
  m->nr_classes = 0;
  m->classes = NULL;

  *error_string = "Not yet implemented";

  if (!m->attribute) {
    *error_string = "no such attribute for corpus";
    drop_mapping(&m);
  }

  while ( m  &&  fgets(s, CL_MAX_LINE_LENGTH, fd) != NULL ) {

    if (s[0] && s[strlen(s)-1] == '\n')
      s[strlen(s)-1] = '\0';

    /* NB. The following if-else takes up all the rest of this while-loop. */
    if (s[0] == '#') {

      /* lines beginning with # */

      /* if this line begins with the NAME_TOKEN... */
      if (strncasecmp(s, NAME_TOKEN, strlen(NAME_TOKEN)) == 0) {

        /* set the name */

        if (m->mapping_name) {
          *error_string = "Multiple mapping names declared";
          drop_mapping(&m);
        }
        else if (!s[strlen(NAME_TOKEN)]) {
          *error_string = "Error in #NAME declaration";
          drop_mapping(&m);
        }
        else {
          m->mapping_name = cl_strdup(s + strlen(NAME_TOKEN));
        }
      }

      /* everything else beginning with # is a comment  (and can thus be ignored) */

    }
    else if (s[0]) {

      /* lines NOT beginning with # */

      /* make new single mapping */

      char *token;
      SingleMappingRecord *this_class = NULL;

      token = strtok(s, " \t\n");

      if (token) {

        /* first token is class name, rest are attribute values */

        /* test: class 'token' already defined? */
        if (find_mapping(m, token) != NULL) {
          *error_string = "Class defined twice";
          drop_mapping(&m);
          break;
        }

        /* create new class */

        if (m->nr_classes == 0) {
          m->classes =
            (SingleMappingRecord *)
            cl_malloc(sizeof(SingleMappingRecord) * CLASS_REALLOC_THRESHOLD);
        }
        else if (m->nr_classes % CLASS_REALLOC_THRESHOLD == 0) {
          m->classes =
            (SingleMappingRecord *)
            cl_realloc(m->classes,
                    sizeof(SingleMappingRecord) *
                    (m->nr_classes + CLASS_REALLOC_THRESHOLD));
        }
        /* else there is enough memory for this new class already! */

        if (m->classes == NULL) {
          *error_string = "Memory allocation failure";
          drop_mapping(&m);
        }
        else {
          m->classes[m->nr_classes].class_name = cl_strdup(token);
          m->classes[m->nr_classes].nr_tokens = 0;
          m->classes[m->nr_classes].tokens = NULL;

          this_class = &(m->classes[m->nr_classes]);
        }

        /* create single mappings : loop through remaining tokens on this line */

        while (m &&
               (token = strtok(NULL, " \t\n"))) {

          int id;

          /* test: token member of attribute values of my attribute? */

          id = get_id_of_string(attr, token);

          if (id < 0 || cderrno != CDA_OK) {
            *error_string = "token not member of attribute";
            drop_mapping(&m);
            break;
          }

          /* test: token already member of any class? */

          if (map_token_to_class(m, token) != NULL) {
            *error_string = "token member of several classes";
            drop_mapping(&m);
            break;
          }
          else if (this_class->tokens) {
            int i;

            for (i = 0; i < this_class->nr_tokens; i++)
              if (this_class->tokens[i] == id) {
                *error_string = "token member of several classes";
                drop_mapping(&m);
                break;
              }
          }

          /* having passed all the tests, put token id into this mapping */

          if (m) {

            if (this_class->nr_tokens == 0) {
              this_class->tokens =
                (int *)
                cl_malloc(sizeof(int) * TOKEN_REALLOC_THRESHOLD);
            }
            else if (this_class->nr_tokens % TOKEN_REALLOC_THRESHOLD == 0) {

              this_class->tokens =
                (int *)
                cl_realloc(this_class->tokens,
                        sizeof(int) * (this_class->nr_tokens +
                                       TOKEN_REALLOC_THRESHOLD));
            }

            if (this_class->tokens == NULL) {
              *error_string = "Memory allocation failure";
              drop_mapping(&m);
            }
            else {
              this_class->tokens[this_class->nr_tokens] = id;
              this_class->nr_tokens++;
            }
          }
        } /* endwhile (loop for each token on a line) */

        if (m) {

          m->nr_classes++;

          /* sort token IDs in increasing order */

          qsort(this_class->tokens,
                this_class->nr_tokens,
                sizeof(int),
                intcompare);

        }
      }
    }
  } /* endwhile (main loop for each line in the mapping file */

  fclose(fd);

  return m;
}
Example #28
0
/**
 * Creates feature maps for a source/target corpus pair.
 *
 * This is the constructor function for the FMS class.
 *
 * Example usage:
 *
 * FMS = create_feature_maps(config_data, nr_of_config_lines, source_word, target_word, source_s, target_s);
 *
 * @param config              array of strings representing the feature map configuration.
 * @param config_lines        the number of configuration items stored in config_data.
 * @param w_attr1             The p-attribute in the first corpus to link.
 * @param w_attr2             The p-attribute in the second corpus to link.
 * @param s_attr1             The s-attribute in the first corpus to link.
 * @param s_attr2             The s-attribute in the second corpus to link.
 * @return                    the new FMS object.
 */
FMS
create_feature_maps(char **config,
                    int config_lines,
                    Attribute *w_attr1,
                    Attribute *w_attr2,
                    Attribute *s_attr1,
                    Attribute *s_attr2
                    ) 
{
  FMS r;

  unsigned int *fcount1, *fcount2;    /* arrays for types in the lexicons of the source
                                       * & target corpora, respectively, counting how often each is used
                                       * in a feature */

  int config_pointer;

  char *b, command[CL_MAX_LINE_LENGTH], dummy[CL_MAX_LINE_LENGTH];

  int current_feature;
  int weight;                         /* holds the weight assigned to the feature(s) we're working on */
  int need_to_abort;                  /* boolean used during pointer check */

  /* after we have counted up features, these will become arrays of ints, with one entry per feature */
  int *fs1, *fs2; 

  int i;
  int nw1;  /* number of types on the word-attribute of the source corpus */
  int nw2;  /* number of types on the word-attribute of the target corpus */

  /* one last variable: we need to know the character set of the two corpora for assorted purposes */
  CorpusCharset charset;
  charset = cl_corpus_charset(cl_attribute_mother_corpus(w_attr1));

  /* first, create the FMS object. */
  r = (FMS) malloc(sizeof(feature_maps_t));
  assert(r);

  /* copy in the attribute pointers */
  r->att1 = w_attr1;
  r->att2 = w_attr2;
  r->s1 = s_attr1;
  r->s2 = s_attr2;

  init_char_map();
  
  /* find out how many different word-types occur on each of the p-attributes */
  nw1 = cl_max_id(w_attr1);
  if (nw1 <= 0) {
    fprintf(stderr, "ERROR: can't access lexicon of source corpus\n");
    exit(1);
  }
  nw2 = cl_max_id(w_attr2);
  if (nw2 <= 0) {
    fprintf(stderr, "ERROR: can't access lexicon of target corpus\n");
    exit(1);
  }
  
  printf("LEXICON SIZE: %d / %d\n", nw1, nw2);

  fcount1 = (unsigned int*) calloc(nw1 + 1, sizeof(unsigned int));
  fcount2 = (unsigned int*) calloc(nw2 + 1, sizeof(unsigned int));

  r->n_features = 1;


  /* NOTE there are two passes through the creation of feature maps - two sets of nearly identical code!
   * First pass to see how many things we need ot count, second pass to count them. */

  /* process feature map configuration: first pass */
  for (config_pointer = 0; config_pointer < config_lines; config_pointer++) {

    /* strip newline and comments */
    if ( (b = strpbrk(config[config_pointer],"\n#")) )
      *b = 0;

    if (sscanf(config[config_pointer], "%s", command) > 0) {
      if(command[0] == '-') {
        /*
         * These are the FIRST PASS options for the different config lines.
         *
         * Possible config commands: -S -W -C -1 -2 -3 -4
         */
        switch(command[1]) {
        /* -S : the "shared words" type of feature */
        case 'S': {
          int i1, i2; /* i1 and i2 are temporary indexes into the lexicons of the two corpora */
          int f1, f2; /* f1 and f2 are temporary storage for frequencies from the corpus lexicons */
          float threshold;
          int n_shared = 0; /* numebr fo shared words - only calculated for the purpose of printing it */

          if(sscanf(config[config_pointer],"%2s:%d:%f %s",command,&weight,&threshold,dummy) != 3) {
            fprintf(stderr,"ERROR: wrong # of args: %s\n",config[config_pointer]);
            fprintf(stderr,"Usage: -S:<weight>:<threshold>\n");
            fprintf(stderr,"  Shared words with freq. ratios f1/(f1+f2) and f2/(f1+f2) >= <threshold>.\n");
            exit(1);
          }
          else {
            printf("FEATURE: Shared words, threshold=%4.1f%c, weight=%d ... ",threshold * 100, '\%', weight);
            fflush(stdout);

            /* for each type in target corpus, get its frequency, and the corresponding id and frequency
             * from the target corpus, then test whether it meets the criteria for use as a feature. */
            for (i1 = 0; i1 < nw1; i1++) {
              f1 = cl_id2freq(w_attr1, i1);
              i2 = cl_str2id(w_attr2, cl_id2str(w_attr1, i1));
              if (i2 >= 0){
                f2 = cl_id2freq(w_attr2, i2);
                /* if it will be used as a feature, increment counts of features in various places */
                if ( (f1 / (0.0+f1+f2)) >= threshold && (f2 / (0.0+f1+f2)) >= threshold){
                  fcount1[i1]++;
                  fcount2[i2]++;
                  n_shared++;
                  r->n_features++; 
                }
              }
            }
            printf("[%d]\n", n_shared);
          }
          break;
        }
        /* -1 to -4 : shared character sequences (of 1 letter to 4 letters in length) as features */
        case '1': 
        case '2':
        case '3':
        case '4': { 
          int n; /* length of the n-gram, obviously */
          
          if (sscanf(config[config_pointer], "%1s%d:%d %s", command, &n, &weight, dummy) !=3 ) {
            fprintf(stderr,"ERROR: wrong # of args: %s\n",config[config_pointer]);
            fprintf(stderr,"Usage: -<n>:<weight>  (n = 1..4)\n");
            fprintf(stderr,"  Shared <n>-grams (single characters, bigrams, trigrams, 4-grams).\n");
            exit(1);
          }
          else if(n <= 0 || n > 4) {
            /* this shouldn't happen anyway */
            fprintf(stderr,"ERROR: cannot handle %d-grams: %s\n", n, config[config_pointer]);
            exit(1);
          }
          else {
            int i,f,l; /* temp storage for lexicon index, n of possible features, && word length */
            char *s;

            printf("FEATURE: %d-grams, weight=%d ... ", n, weight);
            fflush(stdout);

            /* for each entry in source-corpus lexicon, add to the number of features IFF
             * that lexicon entry is longer than 4 characters */
            for(i = 0; i < nw1; i++) {
              /* l = cl_id2strlen(w_attr1, i); */
              s = (unsigned char *) cl_strdup(cl_id2str(w_attr1, i));
              cl_string_canonical( (char *)s, charset, IGNORE_CASE | IGNORE_DIAC);
              l = strlen(s);
              cl_free(s);
              fcount1[i] += (l >= n) ? l - n + 1 : 0;
            }
            /* same for target corpus */
            for(i = 0; i < nw2; i++) {
              /* l = cl_id2strlen(w_attr2, i); */
              s = (unsigned char *) cl_strdup(cl_id2str(w_attr2, i));
              cl_string_canonical( (char *)s, charset, IGNORE_CASE | IGNORE_DIAC);
              l = strlen(s);
              cl_free(s);
              fcount2[i] += (l >= n) ? l - n + 1 : 0;
            }
            /* set f to number of possible features (= number of possible characters to the power of n) */
            f = 1;
            for(i = 0 ; i < n; i++)
              f *= char_map_range;
            /* anmd add that to our total number of features! */
            r->n_features += f;
            printf("[%d]\n", f);
          }
          break;
        }
        /* -W: the word-translation-equivalence type of feature */
        case 'W': {
          char filename[CL_MAX_LINE_LENGTH],
            word1[CL_MAX_LINE_LENGTH],
            word2[CL_MAX_LINE_LENGTH];
          FILE *wordlist;
          int nw;      /* number of words scanned from an input line */
          int nl = 0;  /* counter for the number of lines in the wordlist file we have gone through */
          int i1,i2;   /* lexicon ids in source and target corpora */
          int n_matched = 0;  /* counter for n of lines in input file that can be used as a feature. */

          if(sscanf(config[config_pointer],"%2s:%d:%s %s",command,&weight,filename,dummy)!=3) {
            fprintf(stderr, "ERROR: wrong # of args: %s\n",config[config_pointer]);
            fprintf(stderr, "Usage: -W:<weight>:<filename>\n");
            fprintf(stderr, "  Word list (read from file <filename>).\n");
            exit(1);
          }
          else if(!(wordlist = fopen(filename,"r"))) {
            fprintf(stderr,"ERROR: Cannot read word list file %s.\n",
                    filename);
            exit(-1);
          }
          else {
            printf("FEATURE: word list %s, weight=%d ... ", filename, weight);
            fflush(stdout);
            while((nw = fscanf(wordlist,"%s %s",word1,word2))>0) {
              /* on first line of file, skip UTF8 byte-order-mark if present */
              if (nl == 0 && charset == utf8 && strlen(word1) > 3)
                if (word1[0] == (char)0xEF && word1[1] == (char)0xBB && word1[2] == (char)0xBF)
                   cl_strcpy(word1, (word1 + 3));
              nl++;
              /* check that both word 1 and word 2 are valid for the encoding of the corpora */
              if (! (cl_string_validate_encoding(word1, charset, 0)
                  && cl_string_validate_encoding(word2, charset, 0)) ) {
                fprintf(stderr, "ERROR: character encoding error in the word-list input file with the input word list.\n");
                fprintf(stderr, "       (The error occurs on line %d.)\n", nl);
                exit(1);
              }
              if (nw != 2)
                fprintf(stderr,"WARNING: Line %d in word list '%s' contains %d words, ignored.\n",nl,filename,nw);
              else {
                /* if word1 and word2 both occur in their respective corpora, this is a feature. */
                if(   (i1 = cl_str2id(w_attr1, word1)) >= 0
                   && (i2 = cl_str2id(w_attr2, word2)) >= 0 ) {
                  fcount1[i1]++;
                  fcount2[i2]++;
                  n_matched++;
                  r->n_features++;
                }
              }
            }
            fclose(wordlist);
            printf("[%d]\n", n_matched);
          }         
          break;
        }
        /* -C: the character count type of feature.
         * This feature exists for EVERY word type. */
        case 'C': 
          if(sscanf(config[config_pointer],"%2s:%d %s",command,&weight,dummy)!=2) {
            fprintf(stderr, "ERROR: wrong # of args: %s\n",config[config_pointer]);
            fprintf(stderr, "Usage: -C:<weight>\n");
            fprintf(stderr, "  Character count [primary feature].\n");
            exit(1);
          }
          else {
            /* primary feature -> don't create additional features */
            /* first entry in a token's feature list is character count */ 
            for (i=0; i<nw1; i++)
              fcount1[i]++;
            for (i=0; i<nw2; i++)
              fcount2[i]++;
            printf("FEATURE: character count, weight=%d ... [1]\n", weight);
          }
          break;
        default:
          fprintf(stderr,"ERROR: unknown feature: %s\n",config[config_pointer]);
          exit(1);
          break;
        }
      }
      else {
        fprintf(stderr,"ERROR: feature parse error: %s\n", config[config_pointer]);
        exit(1);
      }
    }
  }