Beispiel #1
0
void
lem_psu(char *psuline)
{
  char *psu_ngram, *psu_sig;
  
  if (!currlang->runsigs)
    {
      currlang->runsigs = mb_new(lemline_xcp->sigs->mb_sigsets);
      /* other runsigs initialization */
    }
  psu_ngram = psuline;
  while (*psu_ngram && !isspace(*psu_ngram))
    ++psu_ngram;
  while (isspace(*psu_ngram))
    ++psu_ngram;
  if (psu_sig)
    {
      char *psu_ngram_end = psu_sig;
      while (psu_ngram_end > psu_ngram && isspace(psu_ngram_end[-1]))
	--psu_ngram_end;
      *psu_ngram_end = '\0';
      psu_sig += 2;
      while (isspace(*psu_sig))
	++psu_sig;
      psu_register(file,lnum,currlang->runsigs,currlang,psu_ngram,NULL);
    }
}
Beispiel #2
0
/* This routine should not set anything but FORM at the f2 level;
   that is the job of ilem_parse */
void
lem_save_form(const char *ref, const char *lang, 
	      const char *formstr, struct lang_context *langcon)
{
  struct ilem_form *form = mb_new(lemline_xcp->sigs->mb_ilem_forms);
  extern int curr_cell;
  form->ref = (char*)ref;
  if (lang)
    {
      form->f2.lang = (unsigned char*)lang;
      form->f2.core = langcore_of(lang);
      if (strstr(lang,"949"))
	  BIT_SET(form->f2.flags,F2_FLAGS_LEM_BY_NORM);
    }
  if (BIT_ISSET(form->f2.flags,F2_FLAGS_LEM_BY_NORM))
    {
      form->f2.norm = (unsigned char *)formstr;
      form->f2.form = (const unsigned char *)"*";
    }
  else
    form->f2.form = (unsigned char *)formstr;
  form->file = (char*)file;
  form->lnum = lnum;
  form->lang = langcon;

  if (!ref[0])
    return;

  if (!curr_lsp->forms_alloced
      || curr_lsp->forms_used == curr_lsp->forms_alloced)
    {
      curr_lsp->forms_alloced += 16;
      curr_lsp->forms = realloc(curr_lsp->forms,
				curr_lsp->forms_alloced*sizeof(struct ilem_form*));
      curr_lsp->cells = realloc(curr_lsp->cells,
				curr_lsp->forms_alloced*sizeof(int));
      if (curr_lsp->forms_used < 0)
	curr_lsp->forms_used = 0;
    }
  /* when curr_cell = 0 we are in a line with no cells; by definition,
     all content in such a line is in cell 2 (because cell 1 is the line
     number) */
  curr_lsp->cells[curr_lsp->forms_used] = (curr_cell ? curr_cell : 2);
  curr_lsp->forms[curr_lsp->forms_used++] = form;
  hash_add(word_form_index,npool_copy((unsigned char*)ref,lemline_xcp->pool),form);
}
Beispiel #3
0
static void
set_instance_fields(struct xcl_context *xc, struct ML *mlp)
{
  const char *lastw = "";
  int i;
  static char formbuf[128], normbuf[128];
  List *parts = list_create(LIST_SINGLE);

  *formbuf = *normbuf = '\0';
  for (i = 0; i < mlp->matches_used; ++i)
    {
      /* Should we be discriminating about which match
	 of matches[i].matches[] we are using for this? */
      struct f2 *lform = mlp->matches[i].matching_f2s[0];
      struct f2 *clone = mb_new(xc->sigs->mb_f2s);
      /* This is a shallow clone; we only need it so we can
	 set the flags locally */
      *clone = *lform;
      list_add(parts, lform);
      if (strcmp(lastw,mlp->matches[i].lp->ref))
	{
	  if (*formbuf)
	    strcat(formbuf, " ");
	  strcat(formbuf,(char*)lform->form);
	}
      else
	BIT_SET(lform->flags,F2_FLAGS_SAME_REF);
      lastw = mlp->matches[i].lp->ref;
      if (lform->norm)
	{
	  if (*normbuf)
	    strcat(normbuf, " ");
	  strcat(normbuf,(char*)lform->norm);
	}
    }
  mlp->matches[0].psu_form->form = (unsigned char*)formbuf;
  mlp->matches[0].psu_form->norm = (unsigned char*)normbuf;
  mlp->matches[0].psu_form->file = (unsigned char*)mlp->matches[0].lp->f->file;
  mlp->matches[0].psu_form->lnum = mlp->matches[0].lp->f->lnum;
  mlp->matches[0].psu_form->parts = (struct f2**)list2array(parts);
  list_free(parts, NULL);
}
Beispiel #4
0
/* Parse the COF head into the top-level f2, then parse tails into
   the f2->parts array, and NULL-terminate the array */
void
f2_parse_cof(const Uchar *file, size_t line, Uchar *lp, struct f2 *f2p, 
	     Uchar **psu_sense, Uchar *ampamp, struct sig_context *scp)
{
  List *cofs = list_create(LIST_SINGLE);
  Uchar *form = NULL;
  int i = 0;

  while (1)
    {
      list_add(cofs, lp);
      if (!ampamp)
	break;
      *ampamp = '\0';
      ampamp += 2;
      lp = ampamp;
      ampamp = (unsigned char *)strstr((char*)lp,"&&");
    }
  f2p->parts = mb_new_array(scp->mb_f2ps, list_len(cofs));

  /* Parse the head in the top-level f2 structure */
  form = list_first(cofs);
  f2_parse(file,line,form,f2p,NULL,NULL);
  f2p->cof_id = (uintptr_t)f2p;
  BIT_SET(f2p->flags, F2_FLAGS_COF_HEAD);

  /* Now parse the tails into the parts array */
  for (i = 0, form = list_next(cofs); 
       form; 
       form = list_next(cofs), ++i)
    {
      f2p->parts[i] = mb_new(scp->mb_f2s);
      f2_parse(file,line,form,f2p->parts[i],NULL,NULL);
      f2p->parts[i]->cof_id = (uintptr_t)f2p;
      BIT_SET(f2p->parts[i]->flags, F2_FLAGS_COF_TAIL);
    }
  f2p->parts[i] = NULL;
}
Beispiel #5
0
void
sigs_cache_init(struct sigset *sp)
{
#if 0
  /* This is old cache handling; these days we autoload missing languages and we have per-lang
     cache rather than using sp->file == "cache" */
  if (!sp->file)
    return;
  if (!strcmp((const char *)sp->file, "cache"))
    {
      fprintf(stderr, "sigs_cache: can't cache a cache\n");
      return;
    }
#endif
  if (!sigs_cache_pool)
    {
      sigs_cache_pool = npool_init();
      sigs_cache_pool_refs = 0;
    }
  ++sigs_cache_pool_refs;
  sp->cache = mb_new(sp->owner->mb_sigsets);
  sp->cache->project = sp->project;
  sp->cache->lang = sp->lang;
  sp->cache->core = sp->core;
  sp->cache->file = (const unsigned char *)"cache";
  sp->cache->fmem = NULL;
  sp->cache->lines = NULL;
  sp->cache->forms = hash_create(1);
  sp->cache->norms = hash_create(1);
  sp->cache->bigrams_hash = NULL;
  sp->cache->mdsets_hash = NULL;
  sp->cache->cache = NULL;
  sp->cache->bigrams = NULL;
  sp->cache->psus = NULL;
  sp->cache->owner = sp->owner;
  sp->cache->loaded = 1;
}
Beispiel #6
0
const unsigned char *
note_register_tag(const unsigned char *tag, struct node *parent)
{
  if (!tag)
    {
      if (notes_in_line)
	{
	  struct note *last_np = list_last(notes_in_line);
	  if (last_np)
	    {
	      int m = atoi((char*)last_np->tag);
	      if (m > 0)
		{
		  static char buf[10];
		  sprintf(buf, "%d", m+1);
		  return note_register_tag((const unsigned char *)buf, parent);
		}
	      else
		/* this is a stop-gap; it means that alpha notes can be done
		   explicitly, but they'll get mixed with numeric marks if
		   no mark is used in a #note: */
		return note_register_tag((const unsigned char *)"1", parent);
	    }
	  else
	    return note_register_tag((const unsigned char *)"1", parent);
	}
      else
	{
	  return note_register_tag((const unsigned char *)"1", parent);
	}
    }

  if (note_find_in_line(tag))
    {
      vwarning("note tag %s is used more than once in this line", tag);
      return NULL;
    }
  else
    {
      struct note *np = mb_new(mb);
      unsigned char *note_mark_text = NULL;
      struct node *note_mark_node = parent;
      if (note_index < 1000000)
	{
	  unsigned char markbuf[8];
	  sprintf((char*)markbuf,"%d",note_index++);
	  note_mark_text = npool_copy(markbuf, note_pool);
	}
      /* If there was a ^1^ tag in the line we need to replace the text
	 content of the parent element here; otherwise, we have a fresh
	 parent element and just need to append the text node */
      if (note_mark_node->children.lastused)
	((struct node*)(note_mark_node->children.nodes[0]))->data = note_mark_text;
      else
	appendChild(note_mark_node, textNode(note_mark_text));
      np->tag = tag;
      np->mark = note_mark_text;
      np->node = note_mark_node;
      np->status = NOTE_REGISTERED;
      if (notes_in_line)
	list_add(notes_in_line, np);
      /* list_add(notes_in_text, np); */
      return tag;
    }
}
Beispiel #7
0
/* caller should now resolve word_id against word_form_index before
   calling and pass the result as form arg if non-NULL; NULL arg means
   form is embedded in lemma */
void
ilem_parse(struct xcl_context *xc, struct ilem_form *master_formp)
{
  unsigned char *lem;
  int newflag = 0;
  extern const char *phase;
  unsigned char *lemma = NULL;
#define LANGBUF_LEN 32
  char langbuf[LANGBUF_LEN+1];

#if 0
#define FORMBUF_LEN 128
  char formbuf[FORMBUF_LEN+1];
#endif

  struct xcl_l *master_lp = NULL;

  if (!xc)
    {
      vwarning("internal error: ilem_parse called with NULL args");
      return;
    }
  if (!master_formp)
    {
      /* this can happen after ATF parse errors */
      return;
    }

  phase = "lem";

  /*#define  lemma   (master_formp->literal)*/

  if (master_formp->literal)
    {
      lemma = npool_copy((unsigned char *)master_formp->literal, xc->pool);
    }
  else
    {
      struct xcl_l*lp = xcl_lemma(xc,NULL,master_formp->ref,NULL,NULL,0);
      lp->lnum = master_formp->lnum;
      lp->f = master_formp;
      lp->inst = make_inst(xc,lp->f);
      phase = NULL;
      return;
    }

  if (NULL == master_formp->f2.lang)
    {
      if ('%' == *lemma)
	{
	  char *langbufp = langbuf;
	  for (++lemma; *lemma != ':' && *lemma != '-'; )
	    {
	      if (langbufp - langbuf == LANGBUF_LEN)
		{
		  langbuf[LANGBUF_LEN] = '\0';
		  vwarning2(file,lnum,"[91]: lang starting with '%s' is too long (MAX %d)",langbuf,LANGBUF_LEN);
		  phase = NULL;
		  return;
		}
	      else
		*langbufp++ = *lemma++;
	    }
	  if ('-' == *lemma)
	    {
	      while (*lemma && ':' != *lemma)
		++lemma;
	      if (!*lemma)
		{
		  vwarning2(file,lnum,"[92]: lang starting with '%s' has no ':'",langbuf);
		  phase = NULL;
		  return;
		}
	    }
	}
      else
	{
	  vwarning2(file,lnum,"[96]: no lang set for form");
	  phase = NULL;	  
	  return;
	}
      master_formp->f2.lang = npool_copy((unsigned char *)langbuf,xc->pool);
      master_formp->f2.core = langcore_of(langbuf);
    }
  else if ('%' == *lemma && '%' != lemma[1])
    {
      while (*lemma && ':' != *lemma)
	++lemma;
      if (':' != *lemma)
	{
	  vwarning2(file,lnum,"lang has no ':'");
	  return;
	}
      ++lemma;
    }

#if 0
  /* In L1 this routine had to handle lems with a form prepended and separated
     by * (not = , because that conflicts with = in ASCII macron).  This is
     no longer the case in L2 */
  if (NULL == master_formp->f2.form)
    {
      char *formbufp = formbuf;
      while (*lemma != '*')
	{
	  if (formbufp - formbuf == FORMBUF_LEN)
	    {
	      formbuf[10] = '\0';
	      vwarning2(file,lnum,"[94]: form starting '%s' is too long (MAX %d)",formbuf,FORMBUF_LEN);
	      phase = NULL;
	      return;
	    }
	  *formbufp++ = *lemma++;
	}
      if ('*' != *lemma)
	{
	  formbuf[10] = '\0';
	  vwarning2(file,lnum,"[95]: form starting '%s' has no '*'",formbuf,FORMBUF_LEN);
	  phase = NULL;
	  return;
	}
      ++lemma;
    }
#endif

  /* Now we know that lemma points to the start of the lemmatization */
  lem_init((const unsigned char *)lemma);

  /* This outer loop splits on '&' */
  while (1)
    {
      struct xcl_l*lp;
      int alt_count = 0;
      int iflags = 0;
      struct ilem_form *curr_f = NULL;

      lem = lem_next(xc);
      if (!lem)
	break;

      lp = xcl_lemma(xc,NULL,master_formp->ref,NULL,NULL,0);
      lp->inst = master_formp->literal;
      lp->lnum = lnum;
      lp->ante_para = ilem_para_parse(xc, lem,&lem,master_formp->lnum, ilem_para_pos_ante);
      if (lem)
	{
	  unsigned char *post = NULL;
	  while (isspace(*lem))
	    ++lem;
	  post = lem_end(lem);
	  lp->post_para = ilem_para_parse(xc, post,NULL,master_formp->lnum, ilem_para_pos_post);
	  if (isspace(*post))
	    {
	      while (post > lem && isspace(post[-1]))
		--post;
	      *post = '\0';
	    }
	  ilem_para_boundaries(lp,xc);
	}
      else
	{
	  vwarning2(file,master_formp->lnum,"[96]: lem `%s' failed syntax stripping",lem);
	  break;
	}

      alt_init(lem);

      if (master_formp->mcount)
	{
	  struct ilem_form *mrover = NULL;
	  /*lp->f = NULL;*/ /* NEW ILEM_FORM  form_allocator();*/
	  lp->f = mb_new(xc->sigs->mb_ilem_forms);
	  lp->f->newflag = newflag;
	  lp->f->f2.lang = master_formp->f2.lang;
	  lp->f->f2.core = master_formp->f2.core;
	  lp->f->mcount = -1;
	  if (master_formp->mcount == 1)
	    {
	      master_formp->type = "cof-head";
	      master_lp->cof_tails = list_create(LIST_SINGLE);
	    }
	  lp->f->type = "cof-tail";
	  lp->cof_head = master_lp;
	  list_add(lp->cof_head->cof_tails, lp);

	  ++master_formp->mcount;
	  /* efficiency doesn't matter here as we will have relatively 
	     few of these */
	  for (mrover = master_formp; mrover->multi; mrover = mrover->multi)
	    ;
	  mrover->multi = lp->f;
	  /*lp->f->master = master_formp;*/
	  lp->f->file = master_formp->file;
	  lp->f->lnum = master_formp->lnum;
	  lp->ref = lp->f->ref = master_formp->ref;
	  lp->f->f2.form = master_formp->f2.form;
	  lp->f->literal = NULL;
	}
      else
	{
	  lp->f = master_formp;
	  lp->f->mcount = 1;
	  lp->f->newflag = newflag;
	  lp->ref = lp->f->ref;
	  lp->f->type = NULL;
	  master_lp = lp;
	}

      lp->f->instance_flags = iflags;

      /* This inner loop splits on '|'; it is where each lemma is actually
	 handled */
      while (1)
	{
	  lem = alt_next(xc);
	  if (!lem)
	    break;
	  iflags = 0;

	  while (lem_iflags[*lem])
	    {
	      switch (*lem)
		{
		case '+':
		  ++lem;
		  /*newflag = !ignore_plus; */
		  BIT_SET(iflags, F2_FLAGS_LEM_NEW);
		  break;
		case '!':
		  ++lem;
		  BIT_SET(iflags, F2_FLAGS_PSU_STOP);
		  break;
		case '-':
		  ++lem;
		  BIT_SET(iflags, F2_FLAGS_PSU_SKIP);
		  break;
		case '`':
		  lem = (unsigned char *)"X";
		  break;
		}
	    }

	  if (bootstrap_mode && !BIT_ISSET(iflags, F2_FLAGS_LEM_NEW))
	    BIT_SET(iflags, F2_FLAGS_LEM_NEW);

	  if (BIT_ISSET(iflags,F2_FLAGS_LEM_NEW))
	    {
	      char *tmp = malloc(strlen(lem) + 2);
	      sprintf(tmp, "+%s", lem);
	      lem = npool_copy(tmp, xc->pool);
	      free(tmp);
	    }

	  if (alt_count++)
	    {
	      struct ilem_form *last_alt = NULL, *f = NULL;
	      if (!lem)
		break;

	      /*f->f2 = NULL form_allocator();*/
	      f = mb_new(xc->sigs->mb_ilem_forms);
	      /* f->newflag = newflag; */
	      lp->f->ref = master_formp->ref;
	      f->f2.lang = master_formp->f2.lang;
	      f->f2.core = master_formp->f2.core;
	      f->f2.form = master_formp->f2.form;
	      if (BIT_ISSET(iflags, F2_FLAGS_LEM_NEW))
		{
		  BIT_SET(f->f2.flags, F2_FLAGS_LEM_NEW);
		  if ('+' == *lem) /* should always be true */
		    ++lem;
		}
	      f->lnum = master_formp->lnum;
	      f->file = master_formp->file;
	      f->instance_flags = iflags;
	      f->sublem = (char*)npool_copy(lem,xc->pool);

	      /* link this into the master_formp */
	      for (last_alt = master_formp; 
		   last_alt->ambig; 
		   last_alt = last_alt->ambig)
		;
	      curr_f = last_alt->ambig = f;
	    }
	  else
	    {
	      lp->f->sublem = (char*)npool_copy(lem,xc->pool);
	      curr_f = lp->f;
	      if (BIT_ISSET(iflags, F2_FLAGS_LEM_NEW))
		{
		  BIT_SET(curr_f->f2.flags, F2_FLAGS_LEM_NEW);
		  if ('+' == *lem) /* should always be true */
		    ++lem;
		}
	    }

	  /* Instance parsing cannot result in a form with && being
	     processed using f2_parse_cof, so we can just pass a NULL
	     final argument */
	  f2_parse((Uchar*)lp->f->file, lp->f->lnum, lem, &curr_f->f2, 
		   (Uchar**)&curr_f->psu_sense, NULL);

	  if (check_cf((char*)lp->f->file, lp->f->lnum, (char*)curr_f->f2.cf))
	    BIT_SET(curr_f->f2.flags, F2_FLAGS_INVALID);

	  if (curr_f->lang)
	    {
	      curr_lang = curr_f->lang;
	      if (!BIT_ISSET(curr_f->f2.flags,F2_FLAGS_CF_QUOTED))
		curr_f->f2.cf = ilem_conv(lp,curr_f->f2.cf);
	      curr_f->f2.norm = ilem_conv(lp,curr_f->f2.norm);
	      curr_f->f2.base = ilem_conv(lp,curr_f->f2.base);
	      curr_f->f2.cont = ilem_conv(lp,curr_f->f2.cont);
	    }
	  curr_f->sublem = make_inst(xc,curr_f);
	}
    }
}
Beispiel #8
0
void
links_psu(struct xcl_context *xc, struct ML *mlp)
{
  struct linkset *lsp;
  int i;
  lsp = new_linkset(xc->linkbase,"psu",mlp->matches[0].psu);
  if (mlp->matches[0].psu_form)
    {
      struct xcl_l*lp = calloc(1,sizeof(struct xcl_l));
      struct f2 *parsed_psu = mb_new(xc->sigs->mb_f2s);
      set_instance_fields(xc,mlp);
      /* PSU's don't use the && COF notation, so NULL final arg
	 is safe here */
      f2_parse((unsigned char*)mlp->matches[0].lp->xc->file,
	       mlp->matches[0].lp->f->lnum,
	       npool_copy((unsigned char*)mlp->matches[0].psu,xc->pool), 
	       parsed_psu,
	       NULL, NULL);
      
      mlp->matches[0].psu_form->file = (unsigned char *)mlp->matches[0].lp->xc->file;
      mlp->matches[0].psu_form->lnum = mlp->matches[0].lp->f->lnum;
      mlp->matches[0].psu_form->cf = parsed_psu->cf;
      mlp->matches[0].psu_form->gw = parsed_psu->gw;
      if (mlp->matches[0].lp->f->psu_sense)
	mlp->matches[0].psu_form->sense = (unsigned char*)mlp->matches[0].lp->f->psu_sense;
      else if (parsed_psu->sense)
	mlp->matches[0].psu_form->sense = parsed_psu->sense;
      if (parsed_psu->pos)
	mlp->matches[0].psu_form->pos = parsed_psu->pos;
      if (parsed_psu->epos)
	mlp->matches[0].psu_form->epos = parsed_psu->epos;
	

      lp->parent = xc->root; /* fake this */
      lsp->form = mlp->matches[0].psu_form;
      lsp->form->file = (unsigned char*)xc->file;
      lsp->form->lnum = mlp->matches[0].lp->lnum;

      lsp->form->sig = f2_psu_sig(mlp->matches[0].psu_form,
				  xc->pool);
      lp->inst = psu_inst((char*)lsp->form->sig);
      lp->f = calloc(1,sizeof(struct ilem_form));
      lp->f->file = (char*)mlp->matches[0].psu_form->file;
      lp->f->lnum = mlp->matches[0].psu_form->lnum;
      lp->f->f2 = *mlp->matches[0].psu_form;
      if (psus_sig_check)
	sigs_l_check(xc, lp);
      mlp->matches[0].psu_nfinds = lp->f->fcount;
      /* WATCHME: should I be using psu_finds and reporting ambig here? 
       * For now, just use the first sig.
       */
      if (lp->f->fcount > 0)
	lsp->form->sig = lp->f->finds[0]->f2.sig;
      /* can't free this now because it may be referenced via the cache */
      /* free(lp->f); */
      free(lp);
      /* clear the newflag so it doesn't carry over to further occurrences of
	 this psu_form */
      /*mlp->matches[0].psu_form->newflag = 0;*/
      if (psus_sig_check)
	{
	  if (!mlp->matches[0].psu_nfinds)  /*NB: NO AMBIGUITY YET*/
	    {
	      struct f2 *e = mlp->matches[0].psu_form;
	      vwarning2((const char *)e->file, e->lnum, 
			"psu: %s[%s]%s: compound not found",
			e->cf,e->gw,e->pos);
	    }
	  else if (verbose)
	    {
	      struct f2 *e = mlp->matches[0].psu_form;
	      vwarning2((const char *)e->file, e->lnum, 
			"psu: %s[%s]%s found OK",
			e->cf,e->gw,e->pos);
	    }
	}
    }
  preallocate_links(lsp,mlp->matches_used);
  lsp->used = mlp->matches_used;
  for (i = 0; i < mlp->matches_used; ++i)
    {
      
      lsp->links[i].role = "elt";
      lsp->links[i].title = (const char *)mlp->matches[i].lp->f->f2.cf;
      lsp->links[i].lp = mlp->matches[i].lp;
      lsp->links[i].lref = mlp->matches[i].lp->xml_id;

      /* WATCHME: this is a bit lazy; but at initial implementation it
	 is not possible for an lp to be part of more than one PSU */
      mlp->matches[i].lp->psurefs = lsp->xml_id;
      mlp->matches[i].lp->f->is_part = 1;

      /* Delete finds which are not PSU matches */
      if (mlp->matches[i].nmatches < mlp->matches[i].lp->f->fcount)
	{
	  memcpy(mlp->matches[i].lp->f->finds, mlp->matches[i].matching_f2s, mlp->matches[i].nmatches * sizeof(struct f2*));
	  mlp->matches[i].lp->f->finds[mlp->matches[i].nmatches] = NULL;
	  mlp->matches[i].lp->f->fcount = mlp->matches[i].nmatches;
	}
    }
}
Beispiel #9
0
void
xcl_eH(void *userData, const char *name)
{
  struct xcl_context *xcp = userData;
  const char *vbar = strchr(name,EXPAT_NS_CHAR);
  if (!strncmp("http://oracc.org/ns/xcl/1.0",name,vbar-name))
    {
      ++vbar;
      if (next_k && !strcmp(vbar,"m"))
	{
	  hash_add(curr_meta,
		   npool_copy((unsigned char *)next_k,xcp->pool),
		   npool_copy((unsigned char *)charData_retrieve(),xcp->pool));
	  next_k = NULL;
	}
      else if (!strcmp(vbar,"c"))
	xcl_chunk_end(xcp);
      else if (!strcmp(vbar,"l"))
	{
	  if (!in_ll)
	    {
	      struct xcl_l *lp = xcl_lemma(xcp,
					   curr_xml_id,
					   curr_ref,
					   curr_form,
					   NULL, ll_type);
	      lp->inst = curr_inst;
	      lp->sig = npool_copy(curr_sig,xcp->pool);
	      lp->lnum = curr_lnum;
	      lp->f = mb_new(xcp->sigs->mb_ilem_forms);
	      lp->f->ref = (char*)npool_copy((unsigned char *)curr_ref, xcp->pool);
	      /* FIXME: this is not good enough for COF and PSU */
	      lp->f->f2.sig = lp->sig;
	      f2_parse((unsigned char *)xcp->file, lp->lnum, npool_copy((unsigned char*)curr_sig,xcp->pool), &lp->f->f2, NULL, xcp->sigs);	      
	    }
#if 0
    {
      form->f2.lang = (unsigned char*)lang;
      form->f2.core = langcore_of(lang);
      if (strstr(lang,"949"))
	  BIT_SET(form->f2.flags,F2_FLAGS_LEM_BY_NORM);
    }
  if (BIT_ISSET(form->f2.flags,F2_FLAGS_LEM_BY_NORM))
    {
      form->f2.norm = (unsigned char *)formstr;
      form->f2.form = (const unsigned char *)"*";
    }
  else
    form->f2.form = (unsigned char *)formstr;
  form->file = (char*)file;
  form->lnum = lnum;
  form->lang = langcon;
#endif
	}
      else if (!strcmp(vbar,"ll"))
	{
	  ll_type = ll_none;
	  in_ll = 0;
	}
      else if (!strcmp(vbar,"psu"))
	{
	  unsigned char *tmp = (unsigned char*)charData_retrieve(), *etmp;
	  List *lp;
	  while (isspace(*tmp))
	    ++tmp;
	  etmp = tmp+strlen((char*)tmp);
	  while (isspace(etmp[-1]))
	    --etmp;
	  *etmp = '\0';
	  if (!(lp = hash_find(xcp->psus,(unsigned char*)psu_lang)))
	    {
	      lp = list_create(LIST_SINGLE);
	      hash_add(xcp->psus,npool_copy((unsigned char*)psu_lang,xcp->pool),lp);
	    }
	  list_add(lp,npool_copy(tmp,xcp->pool));
	}
    }
  else
    charData_discard();
}