Exemple #1
0
static dpsunicode_t *DpsSegmentProcess(DPS_CHINALIST *List, dpsunicode_t *line) {
  int top, nextid, *position, *next, len, maxid, i, current, father, needinsert, iindex;
  unsigned int h;
  double *value, p;
  dpsunicode_t **result;
  dpsunicode_t *otv, space[] = {32, 0};
  DPS_CHINAWORD *chinaword, chiw;

  if (/*(line[0] >= 0x80) &&*/ (List->hash != NULL)) {

    len = DpsUniLen(line);
    maxid = 2 * len + 1;
    position = (int*)DpsMalloc(maxid * sizeof(int));
    if (position == NULL) return NULL;
    next = (int*)DpsMalloc(maxid * sizeof(int));
    if (next == NULL) {
      DPS_FREE(position);
      return NULL;
    }
    value = (double*)DpsMalloc(maxid * sizeof(double));
    if (value == NULL) {
      DPS_FREE(position); DPS_FREE(next);
      return NULL;
    }
    result = (dpsunicode_t **)DpsMalloc(maxid * sizeof(dpsunicode_t *));
    if (result == NULL) {
      DPS_FREE(position); DPS_FREE(next); DPS_FREE(value);
      return NULL;
    }
    
    top = 0;
/*    value[0] = 1;*/
    value[0] = 1.0 * List->total * len; 
    position[0] = 0;
    next[0] = -1;
    result[0] = (dpsunicode_t*)DpsUniDup(&space[1]);
    nextid = 1;

/*    fprintf(stderr, "SegmentProcess start: len -- %d\n", len);*/

    while ((top != -1) && (!((position[top] >= len) && (next[top] == -1)))) {

/*      fprintf(stderr, "top: %d  position: %d (len: %d)  next:%d\n", top, position[top], len, next[top]);*/


/*   # find the first open path */
      current = top;
      father = top;
      while ((current != -1) && (position[current] >= len)) {
	father = current;
	current = next[current];
      }
/*   # remove this path */
      if (current == top) {
	top = next[top];
      } else {
	next[father] = next[current];
      }

      if (current == -1) {
/*       # no open path, finished, take the first path */
	next[top] = -1;
      } else {
	otv = &line[position[current]];
	h = (unsigned int)(otv[0] & 0xffff);

/*       # if the first character doesn't have word phrase in the dict.*/
	if (List->hash[h] == 0) {
	  List->hash[h] = 1 /*2*/;
	}

	i = List->hash[h];
	if (i + position[current] > len) {
	  i = len - position[current];
	}
	/*i = i + 1*/ /*2*/;
	otv = NULL;
	for (; i > 0; i-- /*2*/) {
	  /*i = i - 1*/ /*2*/;
	  DPS_FREE(otv);
	  otv = DpsUniNDup(&line[position[current]], (size_t)i);
	  chinaword = DpsChineseListFind(List, otv);

	  if (i == 1 /*2*/ && chinaword == NULL) {
	    DPS_FREE(otv);
	    otv = DpsUniNDup(&line[position[current]], 1/*2*/);
	    chiw.word = otv;
	    chiw.freq = 1;
	    DpsChineseListAdd(List, chinaword = &chiw);
/*	    DpsChineseListSort(List);*/
	    /*i = 1*//*2*//*;*/
	  }

	  if ((chinaword != NULL) && chinaword->freq) {
/*       # pronode()   */
/*	  value[nextid] = value[current] * chinaword->freq / List->total;*/
	    p = (double)chinaword->freq / List->total;
	    value[nextid] = value[current] / (-1.0 * log(p) / log(10.0));
	    position[nextid] = position[current] + i;
	    h = DpsUniLen(result[current]) + DpsUniLen(otv) + 2;
	    result[nextid] = (dpsunicode_t*)DpsXmalloc((size_t)h * sizeof(dpsunicode_t));
	    if (result[nextid] == NULL) {
	      DPS_FREE(position); DPS_FREE(next); DPS_FREE(value); DPS_FREE(result);
	      return NULL;
	    }
	    DpsUniStrCpy(result[nextid], result[current]);
	    DpsUniStrCat(result[nextid], space);
	    DpsUniStrCat(result[nextid], otv);
/*
    # check to see whether there is duplicated path
    # if there is a duplicate path, remove the small value path
*/
	    needinsert = 1;
	    iindex = top;
	    father = top;
	    while (iindex != -1) {
	      if (position[iindex] == position[nextid]) {
		if (0.85 * value[iindex] >= value[nextid]) {
		  needinsert = 0;
		} else {
		  if (top == iindex) {
		    next[nextid] = next[iindex];
		    top = nextid;
		    needinsert = 0;
    /*          } else {
	          next[nextid] = next[father];*/ /*  next[father] = next[nextid];*/
		  }
		}
		iindex = -1;
	      } else {
		father = iindex;
		iindex = next[iindex];
	      }
	    }
/*    # insert the new path into the list */
/*	    fprintf(stderr, "current:%d  position:%d  i:%d  value[current]:%.12lf  nextid:%d  value[nextid]:%.12lf\n", 
		    current, position[current], i, value[current], nextid, value[nextid]);*/
	    if (needinsert == 1) {
	      while ((iindex != -1) && (value[iindex] > value[nextid])) {
		father = iindex;
		iindex = next[iindex];
	      }
	      if (top == iindex) {
		next[nextid] = top;
		top = nextid;
	      } else {
		next[father] = nextid;
		next[nextid] = iindex;
	      }
	    }
	    nextid++;
	    if (nextid >= maxid) {
	      maxid +=128;
	      position = (int*)DpsRealloc(position, maxid * sizeof(int));
	      next = (int*)DpsRealloc(next, maxid * sizeof(int));
	      value = (double*)DpsRealloc(value, maxid * sizeof(double));
	      result = (dpsunicode_t **)DpsRealloc(result, maxid * sizeof(dpsunicode_t *));
	      if (position == NULL || next == NULL || value == NULL || result == NULL) {
		DPS_FREE(position); DPS_FREE(next); DPS_FREE(value);
		if (result != NULL) {
		  for (i = 0; i < nextid; i++) {
		    if (i != top) DPS_FREE(result[i]);
		  }
		  DPS_FREE(result);
		}
		return NULL;
	      }
	    }
	  }

	} /*while ((i >= 1) && ( chinaword == NULL));*/


	DPS_FREE(otv);
      }
    }

    DPS_FREE(position); DPS_FREE(next);
    for (i = 0; i < nextid; i++) {
      if (i != top) DPS_FREE(result[i]);
    }
    otv = result[top];
    DPS_FREE(value); DPS_FREE(result);
    return otv;

  } else {
    return (dpsunicode_t*)DpsUniDup(line);
  }
}
Exemple #2
0
int DpsSEAMake(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc, DPS_DSTR *excerpt,  
	       const char *content_lang, size_t *indexed_size, size_t *indexed_limit, 
	       size_t max_word_len, size_t min_word_len, int crossec, int seasec
#ifdef HAVE_ASPELL
	       , int have_speller, AspellSpeller *speller
#endif
	       ) {
  DPS_SENTENCELIST List;
  DPS_MAPSTAT MapStat;
  DPS_TEXTITEM Item;
  DPS_VAR	*Sec;
  dpsunicode_t *sentence, *lt, savec;
  double *links, *lang_cs, w;
  double delta, pdiv, cur_div;
  size_t l, sent_len, order;
  size_t min_len = 10000000, min_pos = 0;
  int  it;
  register size_t i, j;
#ifdef DEBUG
  char lcstr[4096];

#endif

  TRACE_IN(Indexer, "DpsSEAMake");

  if((Sec = DpsVarListFind(&Doc->Sections, "sea"))) { /* set SEA section to NULL */
    DPS_FREE(Sec->val);
    DPS_FREE(Sec->txt_val);
    Sec->curlen = 0;
  }
  
  bzero(&List, sizeof(List));
  order = 0;
  sentence = DpsUniStrTok_SEA((dpsunicode_t*)excerpt->data, &lt);
  while(sentence) {
    if (lt != NULL) { savec = *lt; *lt = 0; }
#ifdef DEBUG
    DpsConv(&Indexer->uni_lc, lcstr, sizeof(lcstr), (char*)sentence, sizeof(dpsunicode_t) * (DpsUniLen(sentence) + 1));
    fprintf(stderr, "Sentence.%d: %s\n", List.nitems, lcstr);
#endif
    if ((sent_len = DpsUniLen(sentence)) >= Indexer->Flags.SEASentenceMinLength) {
      j = 1;
      for (i = 0; i < List.nitems; i++) {
	if (DpsUniStrCmp(sentence, List.Sent[i].sentence) == 0) {
	  j = 0; break;
	}
      }
      if (j) {
	if ( List.nitems < Indexer->Flags.SEASentences ) {
	  if (List.nitems == List.mitems) {
	    List.mitems += 16;
	    List.Sent = (DPS_SENTENCE*)DpsRealloc(List.Sent, List.mitems * sizeof(DPS_SENTENCE));
	    if (List.Sent == NULL) { TRACE_OUT(Indexer); return DPS_ERROR;}
	  }
	  List.Sent[List.nitems].sentence = DpsUniDup(sentence);
	  List.Sent[List.nitems].len = sent_len;
	  List.Sent[List.nitems].order = order++;
	  sentence = DpsUniDup(sentence);
	  DpsUniStrToLower(sentence);
	  bzero(&List.Sent[List.nitems].LangMap, sizeof(DPS_LANGMAP));
	  DpsBuildLangMap(&List.Sent[List.nitems].LangMap, (char*)sentence, sent_len * sizeof(dpsunicode_t), 0, 0);
	  if (sent_len < min_len) { min_len = sent_len; min_pos = List.nitems; }
	  List.nitems++;
	  DPS_FREE(sentence);
	} else if (sent_len > min_len) {
	  DPS_FREE(List.Sent[min_pos].sentence);
	  List.Sent[min_pos].sentence = DpsUniDup(sentence);
	  List.Sent[min_pos].len = sent_len;
	  List.Sent[min_pos].order = order++;
	  sentence = DpsUniDup(sentence);
	  DpsUniStrToLower(sentence);
	  bzero(&List.Sent[min_pos].LangMap, sizeof(DPS_LANGMAP));
	  DpsBuildLangMap(&List.Sent[min_pos].LangMap, (char*)sentence, sent_len * sizeof(dpsunicode_t), 0, 0);
	  DPS_FREE(sentence);
	  min_len = List.Sent[0].len; min_pos = 0;
	  for(i = 1; i < List.nitems; i++) if (List.Sent[i].len < min_len) { min_len = List.Sent[i].len; min_pos = i; }
	}
      }
    }
#ifdef DEBUG
    fprintf(stderr, "Sent. len.:%d, Min.allowed: %d\n", sent_len, Indexer->Flags.SEASentenceMinLength);
#endif
    if (lt != NULL) *lt = savec;
    sentence = DpsUniStrTok_SEA(NULL, &lt);
  }
  DpsLog(Indexer, DPS_LOG_DEBUG, "SEA sentences: %d", List.nitems);
  if (List.nitems < 4) {
    for (i = 0; i < List.nitems; i++) DPS_FREE(List.Sent[i].sentence);
    DPS_FREE(List.Sent); 
    TRACE_OUT(Indexer);
    return DPS_OK; 
  }

  links = (double*)DpsMalloc(sizeof(double) * List.nitems * List.nitems);
  lang_cs = (double*)DpsMalloc(sizeof(double) * List.nitems);
/*
        k                 ot
  links[i * List.nitems + j] 
*/

  if (links != NULL && lang_cs != NULL) {

    for (i = 0; i < List.nitems; i++) {
      DpsPrepareLangMap(&List.Sent[i].LangMap);
    }

    for (i = 0; i < List.nitems; i++) {
      List.Sent[i].Oi =  List.Sent[i].di = 0.5;
      if (Doc->lang_cs_map == NULL) {
	  links[i * List.nitems + i] = 0.0;
      } else {
	MapStat.map = &List.Sent[i].LangMap;
	DpsCheckLangMap6(Doc->lang_cs_map, &List.Sent[i].LangMap, &MapStat, DPS_LM_TOPCNT * DPS_LM_TOPCNT, 2 * DPS_LM_TOPCNT);
	links[i * List.nitems + i] = (double)MapStat.hits / (2.0 * DPS_LM_TOPCNT) / (List.nitems + 1);
      }
#ifdef DEBUG
      DpsLog(Indexer, DPS_LOG_INFO, "Link %u->%u: %f [hits:%d miss:%d]", i, i, links[i * List.nitems + i], MapStat.hits, MapStat.miss);
#endif
      for (j = 0; j < List.nitems; j++) {
	  if (j == i) continue;
	MapStat.map = &List.Sent[j].LangMap;
	DpsCheckLangMap6(&List.Sent[j].LangMap, &List.Sent[i].LangMap, &MapStat, DPS_LM_TOPCNT * DPS_LM_TOPCNT, 2 * DPS_LM_TOPCNT);

	links[i * List.nitems + j] = (double)MapStat.hits / (2.0 * DPS_LM_TOPCNT) / (List.nitems + 1);
#ifdef DEBUG
	DpsLog(Indexer, DPS_LOG_INFO, "Link %u->%u: %f [hits:%d miss:%d]", i, j, links[i * List.nitems + j], MapStat.hits, MapStat.miss);
#endif
      }
    }

    for (l = 0; l < List.nitems; l++) {
	w = 0.0;
	for (i = 0; i < List.nitems; i++) { 
	    w += links[l * List.nitems + i] * List.Sent[i].Oi;
	}
	w = f(w);
	if (w < LOW_BORDER_EPS2) w = LOW_BORDER_EPS2;
	else if (w > HI_BORDER_EPS2) w = HI_BORDER_EPS2;
	List.Sent[l].di = w;
    }

    DpsSort(List.Sent, List.nitems, sizeof(DPS_SENTENCE), (qsort_cmp)SentCmp);

#ifdef DEBUG
    DpsConv(&Indexer->uni_lc, lcstr, sizeof(lcstr), (char*)List.Sent[0].sentence, sizeof(dpsunicode_t) * (DpsUniLen(List.Sent[0].sentence) + 1));
    fprintf(stderr, "Sent.0: %f %f -- %s\n", List.Sent[0].di, List.Sent[0].Oi, lcstr);
    DpsConv(&Indexer->uni_lc, lcstr, sizeof(lcstr), (char*)List.Sent[1].sentence, sizeof(dpsunicode_t) * (DpsUniLen(List.Sent[1].sentence) + 1));
    fprintf(stderr, "Sent.1: %f %f -- %s\n", List.Sent[1].di, List.Sent[1].Oi, lcstr);
    DpsConv(&Indexer->uni_lc, lcstr, sizeof(lcstr), (char*)List.Sent[2].sentence, sizeof(dpsunicode_t) * (DpsUniLen(List.Sent[2].sentence) + 1));
    fprintf(stderr, "Sent.2: %f %f -- %s\n", List.Sent[2].di, List.Sent[2].Oi, lcstr);
    DpsConv(&Indexer->uni_lc, lcstr, sizeof(lcstr), (char*)List.Sent[3].sentence, sizeof(dpsunicode_t) * (DpsUniLen(List.Sent[3].sentence) + 1));
    fprintf(stderr, "Sent.3: %f %f -- %s\n", List.Sent[3].di, List.Sent[3].Oi, lcstr);
    DpsConv(&Indexer->uni_lc, lcstr, sizeof(lcstr), (char*)List.Sent[4].sentence, sizeof(dpsunicode_t) * (DpsUniLen(List.Sent[4].sentence) + 1));
    fprintf(stderr, "Sent.4: %f %f -- %s\n", List.Sent[4].di, List.Sent[4].Oi, lcstr);
#endif
    DpsSort(List.Sent, TOP_SENTENCES, sizeof(DPS_SENTENCE), (qsort_cmp)SentOrderCmp);

    bzero(&Item, sizeof(Item));
    Item.section = seasec;
    Item.href = NULL;
    Item.section_name = "sea";
    for (i = 0; i < TOP_SENTENCES; i++) {
      dpsunicode_t *UStr = DpsUniDup(List.Sent[i].sentence);
      DpsPrepareItem(Indexer, Doc, &Item, List.Sent[i].sentence, UStr, content_lang, indexed_size, indexed_limit,
		     max_word_len, min_word_len, crossec
#ifdef HAVE_ASPELL
		     , have_speller, speller, NULL
#endif
		     );
      DPS_FREE(UStr);
    }
  }
  DPS_FREE(lang_cs);
  DPS_FREE(links);
  for (i = 0; i < List.nitems; i++) DPS_FREE(List.Sent[i].sentence);
  DPS_FREE(List.Sent);

  TRACE_OUT(Indexer);
  return DPS_OK;
}
Exemple #3
0
int DpsAddStackItem(DPS_AGENT *query, DPS_RESULT *Res, DPS_PREPARE_STATE *state, char *word, dpsunicode_t *uword) {
  int origin;
  size_t      i; 
  size_t wlen = (uword == NULL) ? 0 : DpsUniLen(uword);
  dpshash32_t crcword = (word == NULL) ? 0 : DpsStrHash32(word);

#ifdef DEBUG_BOOL
  DpsLog(query, DPS_LOG_EXTRA, "0[%d].%x %c -- %s [%x] .secno:%d\n", state->order, state->origin, item_type(state->cmd), 
	 (word == NULL) ? "<NULL>" : word, crcword, state->secno[state->p_secno]);
#endif

  if((uword != NULL) && ( DpsStopListFind(&query->Conf->StopWords, uword, state->qlang) ||
			  (query->WordParam.min_word_len > wlen) ||
			  (query->WordParam.max_word_len < wlen)) ) {

    origin = state->origin | DPS_WORD_ORIGIN_STOP;
  } else {
    origin = state->origin;
  }

  if (state->cmd == DPS_STACK_WORD && !(origin & DPS_WORD_ORIGIN_QUERY)) {
    for (i = 0; i < Res->nitems; i++) {
      if ((Res->items[i].order == state->order) && (Res->items[i].crcword == crcword)) return DPS_OK;
    }
  }

  if (Res->nitems >= Res->mitems - 2) {
    Res->mitems += DPS_MAXSTACK;
    Res->items = (DPS_STACK_ITEM*)DpsRealloc(Res->items, Res->mitems * sizeof(DPS_STACK_ITEM));
    if (Res->items == NULL) {
      DpsLog(query, DPS_LOG_ERROR, "Can't alloc %d bytes for %d mitems", Res->mitems * sizeof(DPS_STACK_ITEM), Res->mitems);
      return DPS_ERROR;
    }
  }
      
  if (Res->nitems > 0) {
    if (state->cmd == DPS_STACK_OR || state->cmd == DPS_STACK_AND || state->cmd == DPS_STACK_NEAR || state->cmd == DPS_STACK_ANYWORD) {
      if (Res->items[Res->nitems-1].cmd == DPS_STACK_AND || Res->items[Res->nitems-1].cmd == DPS_STACK_OR 
	  || Res->items[Res->nitems-1].cmd == DPS_STACK_NEAR ||  Res->items[Res->nitems-1].cmd == DPS_STACK_ANYWORD) {
	return DPS_OK;
      }
    }

    if ((Res->nitems > 0) && (state->cmd == DPS_STACK_WORD) 
	&& (
	    (Res->items[Res->nitems-1].cmd == DPS_STACK_WORD)
	    || (Res->items[Res->nitems-1].cmd == DPS_STACK_RIGHT)
	    || (Res->items[Res->nitems-1].cmd == DPS_STACK_PHRASE_RIGHT)
	    )) {
      Res->items[Res->nitems].cmd = DPS_STACK_OR;
      Res->items[Res->nitems].order = 0;
      Res->items[Res->nitems].origin = 0;
      Res->items[Res->nitems].count = 0;
      Res->items[Res->nitems].len = 0;
      Res->items[Res->nitems].crcword = 0;
      Res->items[Res->nitems].word = NULL;
      Res->items[Res->nitems].ulen = 0;
      Res->items[Res->nitems].uword = NULL;
      Res->items[Res->nitems].pbegin = NULL;
      Res->items[Res->nitems].order_origin = 0;
      Res->items[Res->nitems].secno = state->secno[state->p_secno];
      Res->nitems++;
      Res->ncmds++;
#ifdef DEBUG_BOOL
  DpsLog(query, DPS_LOG_EXTRA, "1[%d].%x %c -- %s", 0, 0, item_type(DPS_STACK_OR), "<NULL>");
#endif
    }
    if ((Res->nitems > 0) && (state->cmd == DPS_STACK_LEFT) 
	&& (
	    (Res->items[Res->nitems-1].cmd == DPS_STACK_RIGHT)
	    || (Res->items[Res->nitems-1].cmd == DPS_STACK_PHRASE_RIGHT)
	    )) {
      Res->items[Res->nitems].cmd = state->add_cmd;
      Res->items[Res->nitems].order = 0;
      Res->items[Res->nitems].origin = 0;
      Res->items[Res->nitems].count = 0;
      Res->items[Res->nitems].len = 0;
      Res->items[Res->nitems].crcword = 0;
      Res->items[Res->nitems].word = NULL;
      Res->items[Res->nitems].ulen = 0;
      Res->items[Res->nitems].uword = NULL;
      Res->items[Res->nitems].pbegin = NULL;
      Res->items[Res->nitems].order_origin = 0;
      Res->items[Res->nitems].secno = state->secno[state->p_secno];
      Res->nitems++;
      Res->ncmds++;
#ifdef DEBUG_BOOL
  DpsLog(query, DPS_LOG_EXTRA, "1[%d].%x %c -- %s", 0, 0, item_type(state->add_cmd), "<NULL>");
#endif
    }
  }

  Res->items[Res->nitems].cmd = state->cmd;
  Res->items[Res->nitems].order = state->order;
  Res->items[Res->nitems].order_inquery = state->order_inquery;
  Res->items[Res->nitems].origin = origin;
  Res->items[Res->nitems].count = 0;
  Res->items[Res->nitems].len = (word == NULL) ? 0 : dps_strlen(word);
  Res->items[Res->nitems].crcword = crcword;
  Res->items[Res->nitems].word = (word == NULL) ? NULL : DpsStrdup(word);
  Res->items[Res->nitems].ulen = wlen;
  Res->items[Res->nitems].uword = (uword == NULL) ? NULL : DpsUniDup(uword);
  Res->items[Res->nitems].pbegin = NULL;
  Res->items[Res->nitems].order_origin = 0;
  Res->items[Res->nitems].wordnum = Res->nitems;
  Res->items[Res->nitems].secno = state->secno[state->p_secno];
  Res->nitems++;
  if (state->cmd != DPS_STACK_WORD) {
    Res->ncmds++;
  } else {
    Res->items[state->order].order_origin |= origin;
  if (state->order > Res->max_order) Res->max_order = state->order;
  if (state->order_inquery > Res->max_order_inquery) Res->max_order_inquery = state->order;
  }
/*  if ((state->cmd == DPS_STACK_WORD) && state->order > Res->max_order) Res->max_order = state->order;*/
#ifdef DEBUG_BOOL
  DpsLog(query, DPS_LOG_EXTRA, "1[%d,%d].%x %c -- %s", state->order, state->order_inquery, state->origin, item_type(state->cmd), 
	 (word == NULL) ? "<NULL>" : word);
#endif

  return DPS_OK;
}
Exemple #4
0
__C_LINK int __DPSCALL DpsSynonymListLoad(DPS_ENV * Env,const char * filename){
     struct stat     sb;
     char      *str, *data = NULL, *cur_n = NULL;
     char      lang[64]="";
     DPS_CHARSET    *cs=NULL;
     DPS_CHARSET    *sys_int=DpsGetCharSet("sys-int");
     DPS_CONV  file_uni;
     DPS_WIDEWORD    *ww = NULL;
     size_t key = 1;
     int flag_th = 0;
     int             fd;
     char            savebyte;
     
     if (stat(filename, &sb)) {
       fprintf(stderr, "Unable to stat synonyms file '%s': %s", filename, strerror(errno));
       return DPS_ERROR;
     }
     if ((fd = DpsOpen2(filename, O_RDONLY)) <= 0) {
       dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to open synonyms file '%s': %s", filename, strerror(errno));
       return DPS_ERROR;
     }
     if ((data = (char*)DpsMalloc(sb.st_size + 1)) == NULL) {
       dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to alloc %d bytes", sb.st_size);
       DpsClose(fd);
       return DPS_ERROR;
     }
     if (read(fd, data, sb.st_size) != (ssize_t)sb.st_size) {
       dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to read synonym file '%s': %s", filename, strerror(errno));
       DPS_FREE(data);
       DpsClose(fd);
       return DPS_ERROR;
     }
     data[sb.st_size] = '\0';
     str = data;
     cur_n = strchr(str, '\n');
     if (cur_n != NULL) {
       cur_n++;
       savebyte = *cur_n;
       *cur_n = '\0';
     }

     while(str != NULL) {
          if(str[0]=='#'||str[0]==' '||str[0]=='\t'||str[0]=='\r'||str[0]=='\n') goto loop_continue;
          
          if(!strncasecmp(str,"Charset:",8)){
               char * lasttok;
               char * charset;
               if((charset = dps_strtok_r(str + 8, " \t\n\r", &lasttok))) {
                    cs=DpsGetCharSet(charset);
                    if(!cs){
                         dps_snprintf(Env->errstr, sizeof(Env->errstr), "Unknown charset '%s' in synonyms file '%s'",
                                   charset, filename);
                         DPS_FREE(data);
			 DpsClose(fd);
                         return DPS_ERROR;
                    }
                    DpsConvInit(&file_uni, cs, sys_int, Env->CharsToEscape, 0);
               }
          }else
          if(!strncasecmp(str,"Language:",9)){
               char * lasttok;
               char * l;
               if((l = dps_strtok_r(str + 9, " \t\n\r", &lasttok))) {
                    dps_strncpy(lang, l, sizeof(lang)-1);
               }
          }else
          if(!strncasecmp(str, "Thesaurus:", 10)) {
               char * lasttok;
	       char *tok = dps_strtok_r(str + 10, " \t\n\r", &lasttok);
	       flag_th = (strncasecmp(tok, "yes", 3) == 0) ? 1 : 0;
          }else{
               char      *av[255];
               size_t         ac, i, j;
	       dpsunicode_t *t;

               if(!cs){
                    dps_snprintf(Env->errstr,sizeof(Env->errstr)-1,"No Charset command in synonyms file '%s'",filename);
                    DpsClose(fd); DPS_FREE(data);
                    return DPS_ERROR;
               }
               if(!lang[0]){
                    dps_snprintf(Env->errstr,sizeof(Env->errstr)-1,"No Language command in synonyms file '%s'",filename);
                    DpsClose(fd); DPS_FREE(data);
                    return DPS_ERROR;
               }

               ac = DpsGetArgs(str, av, 255);
               if (ac < 2) goto loop_continue;

               if ((ww = (DPS_WIDEWORD*)DpsRealloc(ww, ac * sizeof(DPS_WIDEWORD))) == NULL) return DPS_ERROR;

               for (i = 0; i < ac; i++) {
                 ww[i].word = av[i];
                 ww[i].len = dps_strlen(av[i]);
                 ww[i].uword = t = (dpsunicode_t*)DpsMalloc((3 * ww[i].len + 1) * sizeof(dpsunicode_t));
		 if (ww[i].uword == NULL) return DPS_ERROR;
                 DpsConv(&file_uni, (char*)ww[i].uword, sizeof(dpsunicode_t) * (3 * ww[i].len + 1), av[i], ww[i].len + 1);
                 DpsUniStrToLower(ww[i].uword);
		 ww[i].uword = DpsUniNormalizeNFC(NULL, ww[i].uword);
		 DPS_FREE(t);
               }

               for (i = 0; i < ac - 1; i++) {
                 for (j = i + 1; j < ac; j++) {

                   if((Env->Synonyms.nsynonyms + 1) >= Env->Synonyms.msynonyms){
                    Env->Synonyms.msynonyms += 64;
                    Env->Synonyms.Synonym = (DPS_SYNONYM*)DpsRealloc(Env->Synonyms.Synonym, 
                                                   sizeof(DPS_SYNONYM)*Env->Synonyms.msynonyms);
		    if (Env->Synonyms.Synonym == NULL) {
		      Env->Synonyms.msynonyms = Env->Synonyms.nsynonyms = 0;
		      return DPS_ERROR;
  		    }
                   }
               
                   bzero((void*)&Env->Synonyms.Synonym[Env->Synonyms.nsynonyms], sizeof(DPS_SYNONYM));
               
                   /* Add direct order */
                   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.uword = DpsUniDup(ww[i].uword);
                   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.uword = DpsUniDup(ww[j].uword);
		   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.count = 
		     Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.count = (size_t)((flag_th) ? key : 0);
                   Env->Synonyms.nsynonyms++;
               
                   bzero((void*)&Env->Synonyms.Synonym[Env->Synonyms.nsynonyms], sizeof(DPS_SYNONYM));
               
                   /* Add reverse order */
                   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.uword = DpsUniDup(ww[j].uword);
                   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.uword = DpsUniDup(ww[i].uword);
		   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.count = 
		     Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.count = (size_t)((flag_th) ? key : 0);
                   Env->Synonyms.nsynonyms++;
                 }
               }

               for (i = 0; i < ac; i++) {
                 DPS_FREE(ww[i].uword);
               }
               do { key++; } while (key == 0);
          }
     loop_continue:
	  str = cur_n;
	  if (str != NULL) {
	    *str = savebyte;
	    cur_n = strchr(str, '\n');
	    if (cur_n != NULL) {
	      cur_n++;
	      savebyte = *cur_n;
	      *cur_n = '\0';
	    }
	  }
     }
     DPS_FREE(data);
     DPS_FREE(ww);
     DpsClose(fd);
     return DPS_OK;
}