Example #1
0
/* 
   fp->sp->owner is scp (assert this is non-NULL)
   fp->sp is sp
   sigs[0] is sig
 */
void
sigs_cache_add(struct ilem_form *ifp, struct sig const *const *sigs)
{
  if (!ifp || !ifp->sp || BIT_ISSET(ifp->f2.flags, F2_FLAGS_FROM_CACHE))
    return;

  if (!ifp->sp->cache)
    sigs_cache_init(ifp->sp);

  if (verbose)
    fprintf(stderr, "sigs_cache: adding %s to cache\n", ifp->f2.form);

  sigs_load_one_sig(ifp->sp->owner, ifp->sp->cache, sigs[0]->sig, 0, ifp);

#if 0
  if (BIT_ISSET(fp->f2.flags, F2_FLAGS_LEM_BY_NORM))
    hash_add(fp->sp->cache,
	     npool_copy(ifp->f2.norm, sigs_cache_pool),
	     (void*)sigs);
  else
    hash_add(fp->sp->cache,
	     npool_copy(ifp->f2.form, sigs_cache_pool),
	     (void*)sigs);
#endif
}
Example #2
0
File: est.c Project: oracc/oracc
struct est *
est_init(const char *project, const char *index)
{
  struct est *estp = malloc(sizeof(struct est));
  estp->h = hash_create(1000);
  estp->p = npool_init();
  estp->project = (const char*)npool_copy((const unsigned char *)project, estp->p);
  estp->index = (const char*)npool_copy((const unsigned char *)index, estp->p);
  estp->filename = (const char*)npool_copy((const unsigned char *)
					   se_file(project, index, "keys.est"), 
					   estp->p);
  return estp;
}
Example #3
0
static unsigned char *
alt_next(struct xcl_context *xc)
{
  unsigned char *this_lem = lem_next_alt;
  if (this_lem)
    {
      lem_next_alt = this_lem;
      while (*lem_next_alt)
	if ('|' == *lem_next_alt 
	    && (lem_next_alt > this_lem && lem_next_alt[-1] != '\\')
	    && lem_next_alt[-1] != '.'
	    && lem_next_alt[-1] != '-'
	    && lem_next_alt[-1] != '}'
	    && lem_next_alt[-1] != '('
	    && lem_next_alt[1]
	    && lem_next_alt[1] != '.'
	    && lem_next_alt[1] != '-'
	    && lem_next_alt[1] != '{'
	    && lem_next_alt[1] != ')'
	    )
	  {
	    *lem_next_alt++ = '\0';
	    break;
	  }
	else
	  ++lem_next_alt;
      if (*lem_next_alt == '\0')
	lem_next_alt = NULL;
      return npool_copy(this_lem, xc->pool);
    }
  else
    return NULL;
}
Example #4
0
static void
xmd_eH(void *userData, const char *name)
{
  if (in_cat_data) /* breaks if a key can occur more than once */
    {
      if (!strcmp(name,"cat"))
	in_cat_data = 0;
      else if (strcmp(name,"subfield")) /*FIXME: should do something
					  with subfields */
	hash_add(xmd_vals, 
		 (unsigned char*)npool_copy(name, xmd_pool), 
		 (unsigned char *)npool_copy(charData_retrieve(), xmd_pool));
    }
  else if (!strcmp(name,"images"))
    in_cat_data = 1;
}
Example #5
0
struct nsa_token *
create_unit(struct nsa_parser *p,const char *s,struct nsa_token *t)
{
  const char *n = nsa_trim_morph(p->context, s);
  struct nsa_hash_data *d = hash_find(p->context->step_index, (unsigned char *)n);
  if (d)
    {
      struct nsa_token *tu = new_token();
      struct nsa_unit *u = new_unit();
      List *l = list_create(LIST_SINGLE);

      list_add(l,t);

      if (d->continuations)
	d = check_continuations(d,p,&n,l);

      u->name = (char *)npool_copy((unsigned char *)n,p->pool);
      u->cands = d->cands;
      tu->type = NSA_T_UNIT;
      if (t)
	{
	  struct nsa_token *lt;
	  int i;
	  tu->children = new_children(list_len(l));
	  for (i = 0, lt = list_first(l); lt; lt = list_next(l),++i)
	    tu->children[i] = lt;
	}
      tu->d.u = u;
      list_free(l,NULL);
      return tu;
    }
  else
    return t;
}
Example #6
0
static unsigned char *
lem_next(struct xcl_context *xc)
{
  unsigned char *this_lem = lem_next_lem;
  if (this_lem)
    {
      lem_next_lem = this_lem;
      while (*lem_next_lem)
	if ('&' == *lem_next_lem 
	    && (lem_next_lem[-1] != '\\'
		&& lem_next_lem[-1] != '+' && lem_next_lem[-1] != '-'))
	  {
	    *lem_next_lem++ = '\0';
	    if (lem_next_lem[-2] == '.')
	      {
		post_lem_sentence = lem_next_lem[-3];
		/* fprintf(stderr,"post_lem_sentence = %c\n",post_lem_sentence); */
		lem_next_lem[-3] = '\0';
	      }
	    break;
	  }
	else
	  ++lem_next_lem;
      if (*lem_next_lem == '\0')
	lem_next_lem = NULL;
      return npool_copy(this_lem, xc->pool);
    }
  else
    return this_lem;
}
Example #7
0
/*
  ref_fp is the f2 we use as the CFGWPOS source; fp is the f2 that is being
  processed by the lemmer; if ref_fp is NULL fp is used as ref_fp as well
 */
int
f2_alias(struct sig_context *scp, struct f2 *fp, struct f2 *ref_fp)
{
  int ret = 0;
  if (!ref_fp)
    ref_fp = fp;
  if (scp->aliases)
    {
      unsigned char *aform = NULL;
      aform = sas_alias_form(scp->aliases, 
			     fp->form,
			     ref_fp->cf,
			     ref_fp->gw,
			     ref_fp->pos);
      if (strcmp((char*)fp->form,(char*)aform))
	{
	  fp->oform = fp->form;
	  fp->form = npool_copy(aform,scp->pool);
	  ret = 1;
	  if (verbose > 1)
	    fprintf(stderr,"aliased form %s => fp->form %s\n",fp->oform,fp->form);
	}
      free(aform);
    }
  return ret;
}
Example #8
0
void
lem_save_lemma(struct node *wp, const char *lemma)
{
  struct ilem_form *form = hash_find(word_form_index, getAttr(wp,"xml:id"));
  if (form)
    form->literal = (char*)npool_copy((unsigned char *)lemma,lemline_xcp->pool);
  else
    vwarning("internal error: word_form_index lookup failed");
}
Example #9
0
void
lem_append_line(unsigned char *lp)
{
  struct lem_save *sp;
  for (sp = curr_lsp; sp->next; sp = sp->next)
    ;
  last_lsp = sp->next = new_lsp();
  sp->next->line = npool_copy(lp,lemline_xcp->pool);
}
Example #10
0
static char *
make_inst(struct xcl_context *xc, struct ilem_form *ifp)
{
  char buf[1024];
  sprintf(buf,"%%%s:%s=",ifp->f2.lang,ifp->f2.form);
  if (ifp->sublem)
    strcat(buf,ifp->sublem);
  return (char*)npool_copy((unsigned char*)buf,xc->pool);
}
Example #11
0
void
lem_save_cont(unsigned char *lp)
{
  struct lem_save *sp;
  for (sp = curr_lsp; sp->cont; sp = sp->cont)
    ;
  sp->cont = new_lsp();
  sp->cont->line = npool_copy(lp,lemline_xcp->pool);
}
Example #12
0
void
lem_reset_form(const char *ref, const char *form)
{
  struct ilem_form *fp = hash_find(word_form_index,(unsigned char *)ref);
  if (fp)
    {
      fp->f2.form = npool_copy((unsigned char *)form,lemline_xcp->pool);
      form = (const char*)fp->f2.form;
    }	
}
Example #13
0
unsigned char *
f2_sig(struct f2 *fp, struct npool*pool)
{
    unsigned char *ret = NULL;

    if (!fp)
        return NULL;

    if (fp->parts)
    {
        unsigned char *tmp = NULL;
        tmp = sig_one(fp, 0);
        if (tmp)
        {
            List *parts = list_create(LIST_SINGLE);
            int i;
            list_add(parts, tmp);
            for (i = 0; fp->parts[i]; ++i)
            {
                fp->parts[i]->tail_sig = tmp = sig_one(fp->parts[i], 1);
                if (tmp)
                    list_add(parts, tmp);
                else
                    return NULL;
            }
            tmp = list_to_str2(parts, "&&");
            ret = npool_copy(tmp,pool);
            free(tmp);
        }
        else
            return NULL;
    }
    else
    {
        unsigned char *tmp = NULL;
        tmp = sig_one(fp, 0);
        ret = npool_copy(tmp,pool);
        free(tmp);
    }

    return ret;
}
Example #14
0
static const char *
countbase_str(struct nsa_token *m, struct nsa_parser *p)
{
  if (m->type == NSA_T_COUNT)
    {
      char buf[128];
      sprintf(buf,"%d",count_base(m));
      return (const char *)npool_copy((unsigned char *)buf,p->pool);
    }
  return "";
}
Example #15
0
unsigned char *
f2_psu_sig(struct f2 *fp, struct npool *pool)
{
    unsigned char buf[1024];

#if 1
    sprintf((char*)buf,"{%s}::",fp->psu_ngram);
#else
    sprintf((char*)buf,"{%s[%s//%s]%s'%s",
            fp->cf ? fp->cf : (Uchar*)"X",
            fp->gw ? fp->gw : (Uchar*)"X",
            fp->sense ? fp->sense : (Uchar*)"X",
            fp->pos ? fp->pos : (Uchar*)"X",
            fp->epos ? fp->epos : (Uchar*)"X");
    strcat((char*)buf, "}::");
#endif

    if (fp->parts)
    {
        int i;
        char *amp = NULL;
        for (i = 0; fp->parts[i]; ++i)
        {
            if (i)
                strcat((char*)buf, "++");

#if 1
            if (fp->parts[i]->tail_sig)
                strcat((char*)buf, (char*)fp->parts[i]->tail_sig);
            else
            {
                if (!fp->parts[i]->sig)
                    fp->parts[i]->sig = f2_sig(fp->parts[i], pool);

                if ((amp = strstr((char*)fp->parts[i]->sig, "&&")))
                {
                    int len = strlen((char*)buf) + (amp - (char*)fp->parts[i]->sig);
                    strncat((char*)buf, (char*)fp->parts[i]->sig, amp - (char*)fp->parts[i]->sig);
                    buf[len] = '\0';
                }
                else
                    strcat((char*)buf,tabless(fp->parts[i]->sig));
#else
            if (BIT_ISSET(fp->parts[i]->flags, F2_FLAGS_SAME_REF))
                append_sig_sans_form(buf,(unsigned char*)tabless(fp->parts[i]->sig));
            else
                strcat((char*)buf,tabless(fp->parts[i]->sig));
#endif
            }
        }
    }
    return npool_copy(buf,pool);
}
Example #16
0
static void
incr_val(Hash_table *h, const unsigned char *v)
{
  int *counter = NULL;
  if (!(counter = hash_find(h, v)))
    {
      counter = malloc(sizeof(int *));
      *counter = 1;
      hash_add(h, npool_copy(v, sig_pool), counter);
    }
  else
    *counter += 1;
}
Example #17
0
void
proj_init(struct run_context *runp, const char *project)
{
  struct proj_context *p = hash_find(runp->known_projects, (unsigned char *)project);
  if (!p)
    {
      const char *o = NULL;
      p = calloc(1,sizeof(struct proj_context));
      p->name = (char *)npool_copy((unsigned char *)project, runp->pool);
      hash_add(runp->known_projects,
	       npool_copy((unsigned char *)project,runp->pool),
	       p);
      p->xpd = xpd_init(project,runp->pool);
      p->owner = runp;
      if (xpd_option(p->xpd,"atf-saa-mode"))
	saa_mode = xpd_option_int(p->xpd,"atf-saa-mode");
      o = xpd_option(p->xpd,"render-serial");
      if (o && !strcmp(o, "yes"))
	odt_serial = 1;
      /*set_project(p, project);*/
    }
  runp->proj = p;
}
Example #18
0
int
f2_extreme_alias(struct sig_context *scp, struct f2 *fp, struct f2 *ref_fp)
{

  if (!f2_form_signs(fp->form, ref_fp->form))
    return 0;

  fp->oform = fp->form;
  fp->form = npool_copy(ref_fp->form,scp->pool);

  if (verbose > 1)
    fprintf(stderr,"extreme aliased form %s => fp->form %s\n",fp->oform,fp->form);

  return 1;
}
Example #19
0
static const unsigned char *
ilem_conv(struct xcl_l *l, const unsigned char *str)
{
  const unsigned char *x = NULL;
  if (str)
    {
      int entry_chartrie_er = chartrie_suppress_errors;
      curr_lang = l->f->lang;
      chartrie_suppress_errors = 1;
      x = natf2utf((char*)str,(char*)str+strlen((char*)str),0,l->xc->file,l->lnum);
      if (strcmp((char*)x,(char*)str))
	str = npool_copy(x,l->xc->pool);
      chartrie_suppress_errors = entry_chartrie_er;
    }
  return str;
}
Example #20
0
/* This routine should not set anything but FORM at the f2 level;
   that is the job of ilem_parse */
void
lem_save_form(const char *ref, const char *lang, 
	      const char *formstr, struct lang_context *langcon)
{
  struct ilem_form *form = mb_new(lemline_xcp->sigs->mb_ilem_forms);
  extern int curr_cell;
  form->ref = (char*)ref;
  if (lang)
    {
      form->f2.lang = (unsigned char*)lang;
      form->f2.core = langcore_of(lang);
      if (strstr(lang,"949"))
	  BIT_SET(form->f2.flags,F2_FLAGS_LEM_BY_NORM);
    }
  if (BIT_ISSET(form->f2.flags,F2_FLAGS_LEM_BY_NORM))
    {
      form->f2.norm = (unsigned char *)formstr;
      form->f2.form = (const unsigned char *)"*";
    }
  else
    form->f2.form = (unsigned char *)formstr;
  form->file = (char*)file;
  form->lnum = lnum;
  form->lang = langcon;

  if (!ref[0])
    return;

  if (!curr_lsp->forms_alloced
      || curr_lsp->forms_used == curr_lsp->forms_alloced)
    {
      curr_lsp->forms_alloced += 16;
      curr_lsp->forms = realloc(curr_lsp->forms,
				curr_lsp->forms_alloced*sizeof(struct ilem_form*));
      curr_lsp->cells = realloc(curr_lsp->cells,
				curr_lsp->forms_alloced*sizeof(int));
      if (curr_lsp->forms_used < 0)
	curr_lsp->forms_used = 0;
    }
  /* when curr_cell = 0 we are in a line with no cells; by definition,
     all content in such a line is in cell 2 (because cell 1 is the line
     number) */
  curr_lsp->cells[curr_lsp->forms_used] = (curr_cell ? curr_cell : 2);
  curr_lsp->forms[curr_lsp->forms_used++] = form;
  hash_add(word_form_index,npool_copy((unsigned char*)ref,lemline_xcp->pool),form);
}
Example #21
0
void
nsa_token(struct nsa_parser *p, enum nsa_ptypes type, void *ref, const char *s)
{
  struct nsa_token *t = new_token();

  if (type == NSA_P_STOP)
    {
      t->type = NSA_T_STOP;
    }
  else
    {
      unsigned char *s2 = npool_copy((const unsigned char *)s,p->pool), *brack;
      t->type = NSA_T_GRAPHEME;
      grapheme(t) = new_grapheme();
      grapheme_overt(t) = 1;
      grapheme_text_ref(t) = new_text_ref();
      grapheme_text_ref(t)->ptype = type;
      switch (type)
	{
	case NSA_P_LEMM:
	  grapheme_text_ref(t)->t.lemmptr = ref;
	  break;
	case NSA_P_LITERAL:
	  grapheme_text_ref(t)->t.literal = ref;
	  break;
	case NSA_P_LINK:
	  grapheme_text_ref(t)->t.linkptr = ref;
	  break;
	default:
	  break;
	}
      if ((brack = (unsigned char *)strchr((const char *)s2,'('))
	  && (isdigit(*s2) || ((brack-s2)==1 && (*s2 == 'n' || *s2 == 'N'))))
	{
	  grapheme_num(t) = (char *)s2;
	  *brack++ = '\0';
	  grapheme_unit(t) = (char *)brack;
	  while (*brack && ')' != *brack)
	    ++brack;
	  *brack = '\0';
	}
    }
  list_add(p->toks,t);
}
Example #22
0
struct xcl_context *
xcl_process(struct run_context *run, struct node *text)
{
  struct xcl_context *xc = xcl_create();
  char *langs = texttag_langs();

  /*  xc->system = xcl_get_global_context()->system; */
  /*xc->cache = xcl_cache();*/
  xc->run = run;
  xc->curr = xc->root = NULL;
  xc->langs = (char*)npool_copy((unsigned char*)langs,xc->pool);
  free(langs);
  xc->project = project;
  xc->textid = textid;
  xc->file = file;
  xc->sigs = sig_context_init();
  process(xc,text);
  return xc;
}
Example #23
0
const unsigned char *
note_register_tag(const unsigned char *tag, struct node *parent)
{
  if (!tag)
    {
      if (notes_in_line)
	{
	  struct note *last_np = list_last(notes_in_line);
	  if (last_np)
	    {
	      int m = atoi((char*)last_np->tag);
	      if (m > 0)
		{
		  static char buf[10];
		  sprintf(buf, "%d", m+1);
		  return note_register_tag((const unsigned char *)buf, parent);
		}
	      else
		/* this is a stop-gap; it means that alpha notes can be done
		   explicitly, but they'll get mixed with numeric marks if
		   no mark is used in a #note: */
		return note_register_tag((const unsigned char *)"1", parent);
	    }
	  else
	    return note_register_tag((const unsigned char *)"1", parent);
	}
      else
	{
	  return note_register_tag((const unsigned char *)"1", parent);
	}
    }

  if (note_find_in_line(tag))
    {
      vwarning("note tag %s is used more than once in this line", tag);
      return NULL;
    }
  else
    {
      struct note *np = mb_new(mb);
      unsigned char *note_mark_text = NULL;
      struct node *note_mark_node = parent;
      if (note_index < 1000000)
	{
	  unsigned char markbuf[8];
	  sprintf((char*)markbuf,"%d",note_index++);
	  note_mark_text = npool_copy(markbuf, note_pool);
	}
      /* If there was a ^1^ tag in the line we need to replace the text
	 content of the parent element here; otherwise, we have a fresh
	 parent element and just need to append the text node */
      if (note_mark_node->children.lastused)
	((struct node*)(note_mark_node->children.nodes[0]))->data = note_mark_text;
      else
	appendChild(note_mark_node, textNode(note_mark_text));
      np->tag = tag;
      np->mark = note_mark_text;
      np->node = note_mark_node;
      np->status = NOTE_REGISTERED;
      if (notes_in_line)
	list_add(notes_in_line, np);
      /* list_add(notes_in_text, np); */
      return tag;
    }
}
Example #24
0
/* parent node is "current" node in block.c */
int
note_parse_tlit(struct node *parent, int current_level, unsigned char **lines)
{
  int nlines;
  struct node *n;
  char tagbuf[8], *m = tagbuf;
  unsigned char *notelabel = NULL, *notetext = NULL;
  const unsigned char *tag = NULL, *mark = NULL;

  *tagbuf = '\0';
  lines[0] += 6;
  while (isspace(lines[0][0]))
    ++lines[0];
  
  if ('^' == lines[0][0])
    {
      struct note *np;
      /* the note should already be registered at the tag-point in the line */
      ++lines[0];
      while (lines[0][0] && '^' != lines[0][0])
	{
	  *m++ = lines[0][0];
	  ++lines[0];
	}
      *m = '\0';
      ++lines[0];
      tag = (const unsigned char *)tagbuf;
      np = note_find_in_line(tag);
      if (np)
	{
	  mark = np->mark;
	}
      else
	{
	  warning("tag in note does not have corresponding tag in preceding line");
	  return 1;
	}
    }
  else
    {
      if (list_len(notes_in_line))
	{
	  warning("tagged notes cannot be mixed with untagged ones");
	  return 1;
	}
      else
	{
	  struct node *lastC = note_attach_point(parent);
	  
	  /* If there is no note tag we have to do two things: fix the attach point and set the tag to "1" */
	  if (lastC)
	    {
	      struct node *xmark = NULL;
	      enum e_type e;
	      enum block_levels l;
	      switch (lastC->etype)
		{
		case e_l:
		  {
		    struct node *lastCchild = lastChild(lastC);
		    if (lastCchild && lastCchild->etype == e_c)
		      {
			/* the attach point is either the cell or its
			   chield field if there is one */
			struct node *cField = lastChild(lastCchild);
			if (cField->etype == e_f)
			  lastC = cField;
			else
			  lastC = lastCchild;
			l = WORD;
		      }
		    else if (lastCchild && lastCchild->etype == e_f)
		      {
			/* the attach point is the field */
			lastC = lastCchild;
			l = WORD;
		      }
		    else
		      l = LINE;
		    e = e_g_nonw;
		  }
		  break;
		case e_composite:
		case e_score:
		case e_transliteration:
		  l = TEXT;
		  e = e_note_link;
		  break;
		case e_object:
		  l = OBJECT;
		  e = e_note_link;
		  break;
		case e_surface:
		  l = SURFACE;
		  e = e_note_link;
		  break;
		case e_column:
		  l = COLUMN;
		  e = e_note_link;
		  break;
		  /* FIXME: THIS CAN'T BE RIGHT */
		case e_variant:
		  l = LINE;
		  e = e_note_link;
		  break;
		default:
		  vwarning("unhandled note parent %s", lastC->names[0].pname);
		  break;
		}
	      xmark = elem(e,NULL,lnum,l);
	      if (e == e_g_nonw)
		appendAttr(xmark, attr(a_type, (unsigned char *)"notelink"));
	      appendChild(lastC, xmark);
	      tag = "1";
	      mark = note_register_tag(tag, xmark);
	    }
	  else
	    {
	      warning("nowhere to attach note mark to; please provide context and mark");
	      tag = NULL;
	    }
	}
    }

  if (tag)
    {
      while (isspace(lines[0][0]))
	++lines[0];
      if (!strncmp((char*)lines[0],"@notelabel{", 11))
	{
	  lines[0] += 11;
	  notelabel = lines[0];
	  while (lines[0][0] != '}')
	    ++lines[0];
	  lines[0][0] = '\0';
	  ++lines[0];
	  while (isspace(lines[0][0]))
	    ++lines[0];
	}

      n = elem(e_note_text,NULL,lnum,current_level);
      appendAttr(n, attr(a_note_mark, mark));
      note_register_note(tag, n);

      if (notelabel)
	set_or_append_attr(n,a_note_label,"notelabel",notelabel);

      /* This is a bit weird, but the last character before the content is
	 either a space after #note:, or a space or the closer character
	 after a note mark or label, so we are safe to play this trick
	 with the scan_comment routine */
      --lines[0];
      lines[0][0] = '#';
      notetext = npool_copy(scan_comment_sub(lines,&nlines,0), note_pool);
      (void)trans_inline(n,notetext,NULL,0);
      appendChild(parent,n);
    }

  return nlines;
}
Example #25
0
/* return non-negative on success; -1 on error; 
   non-negative is the length of string parsed by f2_parse.
 */
int
f2_parse(const Uchar *file, size_t line, Uchar *lp, struct f2 *f2p, Uchar **psu_sense, struct sig_context *scp)
{
  Uchar *tmp = NULL, *err_lp = NULL,
    *disambig = NULL, *ampamp = NULL, 
    *orig_lp = lp, field = '\0', *psu_tmp = NULL;
  int ret = 0;
  const char *saved_phase = phase;
  int square, saved_with_textid = with_textid;

  if (!lp)
    return 1;
  /* err_lp = npool_copy(lp, scp->pool); */
  err_lp = (Uchar*)strdup((char*)lp);

  phase = "f2";
  with_textid = 0;

  /* skip the old shadow lem codes */
  if (*lp == '`')
    {
      vwarning2((char*)file,line,"%s: please remove deprecated shadow lem sequence '`' or '`?'",err_lp);      
      lp += 1 + (lp[1] == '?');
    }

  if ((ampamp = (unsigned char*)strstr((char*)lp, "&&")))
    {
      f2_parse_cof(file, line, lp, f2p, psu_sense, ampamp, scp);
      goto ret;
    }

  /* if the sig starts with @ parse the admin fields @PROJ%LANG:FORM=
   * first.
   *
   * N.B.: % and : are not recognized by field_end() as this causes
   * problems parsing morphology.
   */
  if ('@' == *lp)
    {
      f2p->project = lp+1;
      lp = (Uchar*)strchr((char*)lp,'%');
      if (lp)
	{
	  *lp++ = '\0';
	  f2p->lang = lp;
	  f2p->core = langcore_of((const char*)lp);
	  lp = (Uchar*)strchr((char*)lp,':');
	  if (lp)
	    {
	      *lp++ = '\0';
	      f2p->form = lp;
	      lp = (Uchar*)strchr((char*)lp,'=');
	      *lp++ = '\0';
	    }
	}
    }

  if ('[' == *lp)
    {
      /* FIXME: this needs to be more rigorous and check for CF-legal char in initial position */
      vwarning2((char*)file,line,"%s: lemmatization cannot begin with '['",err_lp);
      goto ret;
    }

  if (!strchr((const char *)lp,'['))
    {
      /* f2p->cf = "X"; */
      f2p->gw = (unsigned char *)"X";
      if (*lp == 'n')
	{
	  f2p->pos = "n";
	  ++lp;
	}
      else if (*lp == 'u')
	{
	  f2p->pos = "u";
	  ++lp;
	}
      goto pos_parse;
    }

  /* parse the CF[GW/SENSE]POS'EPOS which are constant: */
  f2p->cf = lp;
  if (*lp == '"')
    {
      ++lp;
      f2p->cf = lp; /* don't include quotes in the CF; 
		       WATCHME: what happens in post-cache
		       retrieval parse?
		     */
      BIT_SET(f2p->flags,F2_FLAGS_CF_QUOTED);
      /*  fp->explicit |= NEW_CF; */ 
      /* have to do something here, i.e., suppress charset translation */
    }

  while (*lp && (*lp != '[' || lp[-1] == '\\'))
    ++lp;

  if (BIT_ISSET(f2p->flags,F2_FLAGS_CF_QUOTED))
    {
      if (lp[-1] == '"')
	lp[-1] = '\0';
      else
	{
	  vwarning2((char*)file,line,"%s: '\"' missing on quoted CF",err_lp);
	  ret = -1;
	}
    }

  if (lp)
    *lp = '\0';
  else
    goto ret;

  if (lp[-1] == ')' && lp[-2] != '\\')
    {
      char *oparen = strchr((char*)f2p->cf,'(');
      if (oparen && oparen[-1] != '\\')
	{
	  *oparen++ = '\0';
	  f2p->restrictor = (unsigned char*)oparen;
	  lp[-1] = '\0';
	}
      else
	{
	  vwarning2((char*)file,line,"%s: '(' missing on restrictor",err_lp);
	  ret = -1;
	  goto ret;
	}
    }
  else if ((tmp = (unsigned char*)strchr((char*)f2p->cf,'(')) && tmp[-1] != '\\')
    {
      vwarning2((char*)file,line,"%s: ')' missing on restrictor",err_lp);
      ret = -1;
      goto ret;
    }

  f2p->gw = ++lp;
  if ((psu_tmp = (Uchar *)strstr(cc(lp),"+=")))
    {
      *psu_tmp = '\0';
      psu_tmp += 2;
      if (psu_sense)
	*psu_sense = psu_tmp;
      psu_tmp = (Uchar *)strchr((const char *)psu_tmp,']');
    }

  /* make SENSE optional here to support inline lem parsing */
  square = 0;
  while (*lp && (*lp != '/' || lp[1] != '/'))
    {
      if (*lp == '[' && lp[-1] != '\\')
	++square;
      else if (*lp == ']' && lp[-1] != '\\')
	{
	  if (square)
	    --square;
	  else
	    break;
	}
      ++lp;
    }

  /* If we didn't find ] but had a psu_sense with +=,
     reset lp to the closing square bracket after the
     psu_sense */
  if (!*lp && psu_tmp)
    lp = psu_tmp;

  if (*lp)
    {
      if ('/' == *lp)
	{
	  *lp++ = '\0';
	  ++lp;
	  f2p->sense = lp;
	  square = 0;
	  while (*lp)
	    {
	      if (*lp == '[' && lp[-1] != '\\')
		++square;
	      else if (*lp == ']' && lp[-1] != '\\')
		{
		  if (square)
		    --square;
		  else
		    break;
		}
	      ++lp;
	    }
	}
      if (*lp) /* lp is at closing square bracket of CF[GW] */
	{
	  *lp++ = '\0';
	  
	  /* This is either a POS or something
	     that starts with a field char */

	pos_parse:
	  if (isupper(*lp))
	    {
	      Uchar *end = NULL;
	      Uchar *epos = NULL;
	      for (end = lp; *end && !isspace(*end); ++end)
		;
	      epos = (Uchar*)strchr((const char *)lp,'\'');
	      f2p->pos = lp;
	      if (epos && epos < end)
		lp = epos;
	      else
		lp = field_end(lp);
	      if (*f2p->pos == 'V' && '/' == *lp && (lp[1] == 't' || lp[1] == 'i'))
		{
		  ++lp;
		  epos = (Uchar*)strchr((const char *)lp,'\'');
		  if (epos && epos < end)
		    lp = epos;
		  else
		    lp = field_end(lp);
		}
	      field = *lp;
	      if (field == '\'')
		{
		  *lp++ = '\0';
		  f2p->epos = lp;
		  lp = field_end(lp);
		  if (*f2p->epos == 'V' && '/' == *lp && (lp[1] == 't' || lp[1] == 'i'))
		    {
		      ++lp;
		      lp = field_end(lp);
		    }
		  field = *lp;
		  *lp++ = '\0';
		}
	      else
		*lp++ = '\0';
	    }
	  else if (*lp == '\'')
	    {
	      *lp++ = '\0';
	      f2p->epos = lp;
	      lp = field_end(lp);
	      field = *lp;
	      *lp++ = '\0';
	    }
	  else if (*lp)
	    {
	      field = *lp;
	      *lp++ = '\0';
	    }

	  /* Now we are at a variable set of instance
	     fields; parse as though they can be in any
	     order, though in principle the order should
	     always be fixed. */
	  while (*lp)
	    {
	      switch (field)
		{
#if 0
		  /* this must follow POS and ' is no longer a field
		   * ender because of conflict with ' in M1
		   */
		case '\'':
		  f2p->epos = lp;
		  break;
#endif
		case '@':
		  f2p->project = lp;
		  break;
		case '%':
		  f2p->lang = lp;
		  break;
		case ':':
		  f2p->form = lp;
		  break;
		case '$':
		  if (!BIT_ISSET(f2p->flags, F2_FLAGS_LEM_BY_NORM))
		    f2p->norm = lp;
		  /* else ignore normalization because we got it from the "FORM" */
		  break;
		case '/':
		  f2p->base = lp;
		  break;
		case '+':
		  if (*lp == '-')
		    f2p->cont = lp;
		  else if (*lp == '.')
		    f2p->augment = lp;
		  else
		    vwarning2((char*)file,line,"%s: '+' in signature should be followed by '-' or '.'", err_lp);
		  ++lp;
		  break;
		case '#':
		  if (*lp == '#')
		    {
		      ++lp;
		      f2p->morph2 = lp;
		    }
		  else
		    f2p->morph = lp;
		  break;
		case '*':
		  f2p->stem = lp;
		  break;
		case '\\':
		  disambig = lp;
		  while (isalnum(*lp) || '\\' == *lp)
		    ++lp;
		  break;
		case '<':
		case ' ':
		case '\t':
		case 0:
		  goto break_switch_loop;
		default:
		  vwarning2((char*)file,line,"%s: parse error at '%c'", err_lp, field);
		  ++ret;
		  goto ret;
		}
	      lp = field_end(lp);
	      if (*lp)
		{
		  field = *lp;
		  *lp++ = '\0';
		}
	    }
	}
    }

 break_switch_loop:

  validate_pos((const char *)file, line, f2p->pos);
  validate_pos((const char *)file, line, f2p->epos);
  if (f2p->base)
    validate_base((const char *)file, line, f2p->base);

#if 0
  /* If lp is non-zero we didn't manage to parse the entire form: */
  if (*lp)
    {
      vwarning2((char*)file,line,"%s: bad tense designator: only allowed with verb POS",err_lp);
      ret = -1;
      goto ret;
    }
#endif

  /* field == '$' occurs when $ is the end of the lem, e.g., ana[to]PRP$ */
  /* FIXME: THIS IS A POOR TEST BECAUSE IT FAILS ON ]N$#M1 */
  if (field == '$')
    {
      if (!f2p->norm || !*f2p->norm)
	f2p->norm = f2p->cf;
    }
  
  if (BIT_ISSET(f2p->flags, F2_FLAGS_LEM_BY_NORM))
    {
      if (f2p->norm && f2p->cf && !strcmp((char*)f2p->cf,(char*)f2p->norm))
	{
	  BIT_SET(f2p->flags, F2_FLAGS_NORM_IS_CF);
	  f2p->cf = NULL;
	}
    }

  if (f2p->gw)
    {
      char *bs = strchr((char*)f2p->gw, '\\');
      if (bs)
	{
	  unsigned char *gwtmp = npool_copy(f2p->gw, f2_pool);
	  bs = (char *)(gwtmp + (bs - (char*)f2p->gw));
	  *bs++ = '\0';
	  f2p->gw = gwtmp;
	  if (*bs == 'i' || *bs == 't')
	    {
	      if ((!f2p->pos || *f2p->pos == 'V')
		  && (!f2p->epos || *f2p->epos == 'V'))
		{
		  if (*bs == 'i')
		    f2p->epos = npool_copy((unsigned char *)"V/i",f2_pool);
		  else
		    f2p->epos = npool_copy((unsigned char *)"V/t",f2_pool);
		}
	      else
		{
		  vwarning2((char*)file,line,"%s: bad designator: only allowed with verb POS",err_lp);
		  ret = -1;
		  goto ret;
		}
	    }
	  else
	    {
	      vwarning2((char*)file,line,"%s: bad designator: only 'i' or 't' allowed",err_lp);
	      ret = -1;
	      goto ret;
	    }
	}
    }
 
 ret:
  free(err_lp);
  /* '<' == *lp must be dealt with by caller */
  if (isspace(*lp))
    *lp++ = '\0';

  if (!f2p->gw || !*f2p->gw)
    {
      if (f2p->sense && *f2p->sense)
	f2p->gw = f2p->sense;
      else
	f2p->gw = (unsigned char *)"1";
    }

  clean_cf((char*)file, line, (unsigned char *)f2p->cf);

  clean_gw_sense((char*)file, line, (unsigned char *)f2p->gw);
  if (f2p->sense)
    clean_gw_sense((char*)file, line, (unsigned char *)f2p->sense);

  if (f2p->augment)
    {
      char buf[1024];
      sprintf(buf,"%s%s%s",f2p->form,AUGMENT_STR,f2p->augment);
      f2p->form = npool_copy((unsigned char *)buf,f2_pool);
    }

  if (disambig)
    {
      char buf[1024];
      if (*disambig == *(DISAMBIG_STR))
	sprintf(buf,"%s%s",f2p->form,disambig);
      else
	sprintf(buf,"%s%s%s",f2p->form,DISAMBIG_STR,disambig);
      f2p->form = npool_copy((unsigned char *)buf,f2_pool);
    }

  if (f2p->cf && strchr((char*)f2p->cf,' '))
    BIT_SET(f2p->flags, F2_FLAGS_IS_PSU);
  phase = saved_phase;
  with_textid = saved_with_textid;

  return (ret < 0) ? -1 : (lp - orig_lp);
}
Example #26
0
File: est.c Project: oracc/oracc
void
est_add(const unsigned char *key, struct est *estp)
{
  const unsigned char *mangled_key = keymangler(key, estmangle, NULL, 0, NULL,NULL);
  hash_add(estp->h, npool_copy(mangled_key, estp->p), npool_copy(key, estp->p));
}
Example #27
0
/* caller should now resolve word_id against word_form_index before
   calling and pass the result as form arg if non-NULL; NULL arg means
   form is embedded in lemma */
void
ilem_parse(struct xcl_context *xc, struct ilem_form *master_formp)
{
  unsigned char *lem;
  int newflag = 0;
  extern const char *phase;
  unsigned char *lemma = NULL;
#define LANGBUF_LEN 32
  char langbuf[LANGBUF_LEN+1];

#if 0
#define FORMBUF_LEN 128
  char formbuf[FORMBUF_LEN+1];
#endif

  struct xcl_l *master_lp = NULL;

  if (!xc)
    {
      vwarning("internal error: ilem_parse called with NULL args");
      return;
    }
  if (!master_formp)
    {
      /* this can happen after ATF parse errors */
      return;
    }

  phase = "lem";

  /*#define  lemma   (master_formp->literal)*/

  if (master_formp->literal)
    {
      lemma = npool_copy((unsigned char *)master_formp->literal, xc->pool);
    }
  else
    {
      struct xcl_l*lp = xcl_lemma(xc,NULL,master_formp->ref,NULL,NULL,0);
      lp->lnum = master_formp->lnum;
      lp->f = master_formp;
      lp->inst = make_inst(xc,lp->f);
      phase = NULL;
      return;
    }

  if (NULL == master_formp->f2.lang)
    {
      if ('%' == *lemma)
	{
	  char *langbufp = langbuf;
	  for (++lemma; *lemma != ':' && *lemma != '-'; )
	    {
	      if (langbufp - langbuf == LANGBUF_LEN)
		{
		  langbuf[LANGBUF_LEN] = '\0';
		  vwarning2(file,lnum,"[91]: lang starting with '%s' is too long (MAX %d)",langbuf,LANGBUF_LEN);
		  phase = NULL;
		  return;
		}
	      else
		*langbufp++ = *lemma++;
	    }
	  if ('-' == *lemma)
	    {
	      while (*lemma && ':' != *lemma)
		++lemma;
	      if (!*lemma)
		{
		  vwarning2(file,lnum,"[92]: lang starting with '%s' has no ':'",langbuf);
		  phase = NULL;
		  return;
		}
	    }
	}
      else
	{
	  vwarning2(file,lnum,"[96]: no lang set for form");
	  phase = NULL;	  
	  return;
	}
      master_formp->f2.lang = npool_copy((unsigned char *)langbuf,xc->pool);
      master_formp->f2.core = langcore_of(langbuf);
    }
  else if ('%' == *lemma && '%' != lemma[1])
    {
      while (*lemma && ':' != *lemma)
	++lemma;
      if (':' != *lemma)
	{
	  vwarning2(file,lnum,"lang has no ':'");
	  return;
	}
      ++lemma;
    }

#if 0
  /* In L1 this routine had to handle lems with a form prepended and separated
     by * (not = , because that conflicts with = in ASCII macron).  This is
     no longer the case in L2 */
  if (NULL == master_formp->f2.form)
    {
      char *formbufp = formbuf;
      while (*lemma != '*')
	{
	  if (formbufp - formbuf == FORMBUF_LEN)
	    {
	      formbuf[10] = '\0';
	      vwarning2(file,lnum,"[94]: form starting '%s' is too long (MAX %d)",formbuf,FORMBUF_LEN);
	      phase = NULL;
	      return;
	    }
	  *formbufp++ = *lemma++;
	}
      if ('*' != *lemma)
	{
	  formbuf[10] = '\0';
	  vwarning2(file,lnum,"[95]: form starting '%s' has no '*'",formbuf,FORMBUF_LEN);
	  phase = NULL;
	  return;
	}
      ++lemma;
    }
#endif

  /* Now we know that lemma points to the start of the lemmatization */
  lem_init((const unsigned char *)lemma);

  /* This outer loop splits on '&' */
  while (1)
    {
      struct xcl_l*lp;
      int alt_count = 0;
      int iflags = 0;
      struct ilem_form *curr_f = NULL;

      lem = lem_next(xc);
      if (!lem)
	break;

      lp = xcl_lemma(xc,NULL,master_formp->ref,NULL,NULL,0);
      lp->inst = master_formp->literal;
      lp->lnum = lnum;
      lp->ante_para = ilem_para_parse(xc, lem,&lem,master_formp->lnum, ilem_para_pos_ante);
      if (lem)
	{
	  unsigned char *post = NULL;
	  while (isspace(*lem))
	    ++lem;
	  post = lem_end(lem);
	  lp->post_para = ilem_para_parse(xc, post,NULL,master_formp->lnum, ilem_para_pos_post);
	  if (isspace(*post))
	    {
	      while (post > lem && isspace(post[-1]))
		--post;
	      *post = '\0';
	    }
	  ilem_para_boundaries(lp,xc);
	}
      else
	{
	  vwarning2(file,master_formp->lnum,"[96]: lem `%s' failed syntax stripping",lem);
	  break;
	}

      alt_init(lem);

      if (master_formp->mcount)
	{
	  struct ilem_form *mrover = NULL;
	  /*lp->f = NULL;*/ /* NEW ILEM_FORM  form_allocator();*/
	  lp->f = mb_new(xc->sigs->mb_ilem_forms);
	  lp->f->newflag = newflag;
	  lp->f->f2.lang = master_formp->f2.lang;
	  lp->f->f2.core = master_formp->f2.core;
	  lp->f->mcount = -1;
	  if (master_formp->mcount == 1)
	    {
	      master_formp->type = "cof-head";
	      master_lp->cof_tails = list_create(LIST_SINGLE);
	    }
	  lp->f->type = "cof-tail";
	  lp->cof_head = master_lp;
	  list_add(lp->cof_head->cof_tails, lp);

	  ++master_formp->mcount;
	  /* efficiency doesn't matter here as we will have relatively 
	     few of these */
	  for (mrover = master_formp; mrover->multi; mrover = mrover->multi)
	    ;
	  mrover->multi = lp->f;
	  /*lp->f->master = master_formp;*/
	  lp->f->file = master_formp->file;
	  lp->f->lnum = master_formp->lnum;
	  lp->ref = lp->f->ref = master_formp->ref;
	  lp->f->f2.form = master_formp->f2.form;
	  lp->f->literal = NULL;
	}
      else
	{
	  lp->f = master_formp;
	  lp->f->mcount = 1;
	  lp->f->newflag = newflag;
	  lp->ref = lp->f->ref;
	  lp->f->type = NULL;
	  master_lp = lp;
	}

      lp->f->instance_flags = iflags;

      /* This inner loop splits on '|'; it is where each lemma is actually
	 handled */
      while (1)
	{
	  lem = alt_next(xc);
	  if (!lem)
	    break;
	  iflags = 0;

	  while (lem_iflags[*lem])
	    {
	      switch (*lem)
		{
		case '+':
		  ++lem;
		  /*newflag = !ignore_plus; */
		  BIT_SET(iflags, F2_FLAGS_LEM_NEW);
		  break;
		case '!':
		  ++lem;
		  BIT_SET(iflags, F2_FLAGS_PSU_STOP);
		  break;
		case '-':
		  ++lem;
		  BIT_SET(iflags, F2_FLAGS_PSU_SKIP);
		  break;
		case '`':
		  lem = (unsigned char *)"X";
		  break;
		}
	    }

	  if (bootstrap_mode && !BIT_ISSET(iflags, F2_FLAGS_LEM_NEW))
	    BIT_SET(iflags, F2_FLAGS_LEM_NEW);

	  if (BIT_ISSET(iflags,F2_FLAGS_LEM_NEW))
	    {
	      char *tmp = malloc(strlen(lem) + 2);
	      sprintf(tmp, "+%s", lem);
	      lem = npool_copy(tmp, xc->pool);
	      free(tmp);
	    }

	  if (alt_count++)
	    {
	      struct ilem_form *last_alt = NULL, *f = NULL;
	      if (!lem)
		break;

	      /*f->f2 = NULL form_allocator();*/
	      f = mb_new(xc->sigs->mb_ilem_forms);
	      /* f->newflag = newflag; */
	      lp->f->ref = master_formp->ref;
	      f->f2.lang = master_formp->f2.lang;
	      f->f2.core = master_formp->f2.core;
	      f->f2.form = master_formp->f2.form;
	      if (BIT_ISSET(iflags, F2_FLAGS_LEM_NEW))
		{
		  BIT_SET(f->f2.flags, F2_FLAGS_LEM_NEW);
		  if ('+' == *lem) /* should always be true */
		    ++lem;
		}
	      f->lnum = master_formp->lnum;
	      f->file = master_formp->file;
	      f->instance_flags = iflags;
	      f->sublem = (char*)npool_copy(lem,xc->pool);

	      /* link this into the master_formp */
	      for (last_alt = master_formp; 
		   last_alt->ambig; 
		   last_alt = last_alt->ambig)
		;
	      curr_f = last_alt->ambig = f;
	    }
	  else
	    {
	      lp->f->sublem = (char*)npool_copy(lem,xc->pool);
	      curr_f = lp->f;
	      if (BIT_ISSET(iflags, F2_FLAGS_LEM_NEW))
		{
		  BIT_SET(curr_f->f2.flags, F2_FLAGS_LEM_NEW);
		  if ('+' == *lem) /* should always be true */
		    ++lem;
		}
	    }

	  /* Instance parsing cannot result in a form with && being
	     processed using f2_parse_cof, so we can just pass a NULL
	     final argument */
	  f2_parse((Uchar*)lp->f->file, lp->f->lnum, lem, &curr_f->f2, 
		   (Uchar**)&curr_f->psu_sense, NULL);

	  if (check_cf((char*)lp->f->file, lp->f->lnum, (char*)curr_f->f2.cf))
	    BIT_SET(curr_f->f2.flags, F2_FLAGS_INVALID);

	  if (curr_f->lang)
	    {
	      curr_lang = curr_f->lang;
	      if (!BIT_ISSET(curr_f->f2.flags,F2_FLAGS_CF_QUOTED))
		curr_f->f2.cf = ilem_conv(lp,curr_f->f2.cf);
	      curr_f->f2.norm = ilem_conv(lp,curr_f->f2.norm);
	      curr_f->f2.base = ilem_conv(lp,curr_f->f2.base);
	      curr_f->f2.cont = ilem_conv(lp,curr_f->f2.cont);
	    }
	  curr_f->sublem = make_inst(xc,curr_f);
	}
    }
}
Example #28
0
static void
eH(void *userData, const char *name)
{
  static int defined = 1;
  if (!strcmp(name,"key"))
    {
      const unsigned char *k = (const unsigned char *)charData_retrieve();
      if (!hash_find(context->syskeys,k))
	hash_add(context->syskeys,npool_copy(k,context->cpool),curr_system);
    }
  else if (!strcmp(name,"det"))
    {
      const unsigned char *d = (const unsigned char *)charData_retrieve();
      if (!hash_find(context->sysdets,d))
	hash_add(context->sysdets,npool_copy(d,context->cpool),curr_system);
    }
  else if (!strcmp(name,"och"))
    {
      const unsigned char *d = (const unsigned char *)charData_retrieve();
      List *l;
      if (!(l = hash_find(context->comheads,d)))
	{
	  l = list_create(LIST_SINGLE);
	  list_add(l,(void*)curr_comhead_sys);
	  hash_add(context->comheads,npool_copy(d,context->cpool),(void*)l);
	}
      else
	{
	  list_add(l,(void*)curr_comhead_sys);
	}
    }
  else if (!strcmp(name,"gal2"))
    {
      const unsigned char *d = (const unsigned char *)charData_retrieve();
      if (!hash_find(context->gal2_tokens,d))
	hash_add(context->gal2_tokens,npool_copy(d,context->cpool),&defined);
    }
  else if (!strcmp(name,"igi"))
    {
      const unsigned char *d = (const unsigned char *)charData_retrieve();
      if (!hash_find(context->igigal_keys,d))
	hash_add(context->igigal_keys,npool_copy(d,context->cpool),last_u);
    }
  else if (!strcmp(name,"la2"))
    {
      const unsigned char *d = (const unsigned char *)charData_retrieve();
      if (!hash_find(context->la2_tokens,d))
	hash_add(context->la2_tokens,npool_copy(d,context->cpool),&defined);
    }
  else if (!strcmp(name,"suffix"))
    {
      const unsigned char *d = (const unsigned char *)charData_retrieve();
      if (!hash_find(context->morph_suffixes,d))
	hash_add(context->morph_suffixes,npool_copy(d,context->cpool),&defined);
    }
  else if (!strcmp(name,"sexfrac"))
    {
      const unsigned char *d = (const unsigned char *)charData_retrieve();
      if (!hash_find(context->sexfracs,d))
	hash_add(context->sexfracs,npool_copy(d,context->cpool),&defined);
    }
}
Example #29
0
static void
sH(void *userData, const char *name, const char **atts)
{
  
  if (name[22] == 'f'
      && (!strcmp(name, "http://oracc.org/ns/xtf/1.0:transliteration")
	  || !strcmp(name, "http://oracc.org/ns/xtf/1.0:composite")))
    {
      strcpy(curr_project, findAttr(atts,"project"));
      strcpy(curr_text_id, get_xml_id(atts));
    }
  else
    {
      const char *utf8 = findAttr(atts,"http://oracc.org/ns/gdl/1.0:utf8");
      if (*utf8)
	{
	  static wchar_t wbuf[128];
	  static size_t n, i;
	  char sbuf[512], xbuf[1024], *hex;
	  unsigned char *sn = NULL;
	  const char *form = NULL;

	  if (*(const unsigned char *)utf8 > 127)
	    {
	      n = mbstowcs(wbuf,utf8,128);
	      hex = malloc(n * 8);
	      *hex = '\0';
	      for (i = 0; i < n; ++i)
		{
		  if (i)
		    strcat(hex, ".");
		  sprintf(hex+strlen(hex),"x%05X",wbuf[i]);
		}
	    }
	  else
	    {
	      hex = strdup(utf8);
	    }
	  if (!(sn = psl_hex_to_sign(hex)))
	    sn = (unsigned char *)hex;
	  sprintf(sbuf,"%s:%s",hex,sn);
	  sprintf(xbuf,"%s:%s:%s:%s",curr_project,curr_text_id,hex,sn);
	  free(hex);
	  
	  if (!(curr_sig_hash = hash_find(signiary,(unsigned char*)sbuf)))
	    {
	      curr_sig_hash = hash_create(1);
	      hash_add(signiary,npool_copy((unsigned char*)sbuf,sig_pool),curr_sig_hash);
	    }
	  incr_val(curr_sig_hash, (const unsigned char *)"#count");
	  ++total_sign_instances;

	  if (!(curr_hash = hash_find(pertext,(unsigned char*)xbuf)))
	    {
	      curr_hash = hash_create(1);
	      hash_add(pertext,npool_copy((unsigned char*)xbuf,sig_pool),curr_hash);
	    }
	  incr_val(curr_hash, (const unsigned char *)"#count");

	  form = findAttr(atts, "form");
	  if (form && *form)
	    {
	      incr_val(curr_sig_hash, (const unsigned char *)form);
	      incr_val(curr_hash, (const unsigned char *)form);
	      curr_sig_hash = curr_hash = NULL;
	    }
	}
    }
}
Example #30
0
void
lem_save_line(unsigned char *lp)
{
  last_lsp = curr_lsp = new_lsp();
  curr_lsp->line = npool_copy(lp,lemline_xcp->pool);
}