C++ (Cpp) DpsUniLen Exemples

Exemple #1

0

Afficher le fichier

Fichier : sgml.c Projet : invokerj/dpsearch-4.53

/** This function replaces SGML entities
    With their UNICODE   equivalents     
*/
void DpsSGMLUniUnescape(dpsunicode_t *ustr) {
  dpsunicode_t *s = ustr, *e, c;
  char sgml[DPS_MAX_SGML_LEN+1];

  while (*s){
          if(*s=='&'){
               int i = 0;
               if(*(s+1)=='#'){
                    for(e = s + 2; (e - s < DPS_MAX_SGML_LEN) && (*e <= '9') && (*e >= '0'); e++);
                    if(*e==';'){
                         for(i = 2; s + i < e; i++)
                              sgml[i-2]=s[i];
                         sgml[i-2] = '\0';
                         *s = atoi(sgml);
                         dps_memmove(s + 1, e + 1, sizeof(dpsunicode_t) * (DpsUniLen(e + 1) + 1));
                    }
               }else{
		    for(e=s+1;(e-s<DPS_MAX_SGML_LEN)&&(((*e<='z')&&(*e>='a'))||((*e<='Z')&&(*e>='A')));e++) {
                      sgml[i] = (char)*e;
                      i++;
                    }
		    sgml[i] = '\0';
                    if( (*e==';') && (c = DpsSgmlToUni(sgml)) ) {
                         *s=c;
                         dps_memmove(s + 1, e + 1, sizeof(dpsunicode_t) * (DpsUniLen(e + 1) + 1));
                         
                    }
		    
               }
          }
          s++;
  }
}

Exemple #2

0

Afficher le fichier

Fichier : unicode.c Projet : github188/SimpleCode

/* backward unicode string compaire */
int DpsUniStrBCmp(const dpsunicode_t *s1, const dpsunicode_t *s2) { 
  register ssize_t l1 = DpsUniLen(s1)-1, l2 = DpsUniLen(s2)-1;
  while (l1 >= 0 && l2 >= 0) {
    if (s1[l1] < s2[l2]) return -1;
    if (s1[l1] > s2[l2]) return 1;
    l1--;
    l2--;
  }
  if (l1 < l2) return -1;
  if (l1 > l2) return 1;
/*  if (*s1 < *s2) return -1;
  if (*s1 > *s2) return 1;*/
  return 0;
}

Exemple #3

0

Afficher le fichier

Fichier : chinese.c Projet : invokerj/dpsearch-4.53

static void DpsChineseListAddBundle(DPS_CHINALIST *List, DPS_CHINAWORD * chinaword){
     unsigned int h;
     size_t len;

     if (List->nwords + 1 > List->mwords) {
       List->mwords += 1024;
       List->ChiWord = (DPS_CHINAWORD *)DpsRealloc(List->ChiWord, (List->mwords)*sizeof(DPS_CHINAWORD));
       if (List->ChiWord == NULL) {
	 List->mwords = List->nwords = 0;
	 return;
       }
     }
     if (List->hash == NULL) {
       List->hash = (size_t *)DpsXmalloc(65536 * sizeof(size_t));
       if (List->hash == NULL) {
	 List->mwords = List->nwords = 0;
	 return;
       }
     }
     List->ChiWord[List->nwords].word = chinaword->word;
     List->ChiWord[List->nwords].freq = chinaword->freq;
     List->total += chinaword->freq;
     h = (unsigned int)(List->ChiWord[List->nwords].word[0] & 0xffff);
     if (List->hash[h] < (len = DpsUniLen(List->ChiWord[List->nwords].word))) {
       List->hash[h] = len;
     }
     List->nwords++;
}

Exemple #4

0

Afficher le fichier

Fichier : unicode.c Projet : github188/SimpleCode

dpsunicode_t *DpsUniGermanReplace(const dpsunicode_t *str) {
  size_t l = DpsUniLen(str);
  dpsunicode_t *german = DpsMalloc((2 * l + 1) * sizeof(dpsunicode_t));
  if (german !=NULL) {
    dpsunicode_t *s = str, *d = german;
    while(*s) {
      switch(*s) {
      case 0x00DF: /* eszett, or scharfes s, small */
	*d++ = 's'; *d++ = 's'; break;
      case 0x1E9E: /* eszett, or scharfes s, big */
	*d++ = 'S'; *d++ = 'S'; break;
      case 0x00D6: *d++ = 'O'; *d++ = 'E'; break;
      case 0x00F6: *d++ = 'o'; *d++ = 'e'; break;

      case 0x00DC: *d++ = 'U'; *d++ = 'E'; break;
      case 0x00FC: *d++ = 'u'; *d++ = 'e'; break;

      case 0x00C4: *d++ = 'A'; *d++ = 'E'; break;
      case 0x00E4: *d++ = 'a'; *d++ = 'e'; break;

      default: *d++ = *s;
      }
      s++;
    }
    *d = 0;
  }
  return german;
}

Exemple #5

0

Afficher le fichier

Fichier : unicode.c Projet : github188/SimpleCode

dpsunicode_t *DpsUniStrRCpy(dpsunicode_t *dst, const dpsunicode_t *src) {
  register size_t i = 0; dpsunicode_t *d = dst; register size_t l = DpsUniLen(src);
  if (l) for (l--; i <= l; i++)
    d[i] = src[l - i];
  d[i] = 0;
  return dst;
}

Exemple #6

0

Afficher le fichier

Fichier : unicode.c Projet : github188/SimpleCode

int DpsUniStrBNCmp(const dpsunicode_t *s1, const dpsunicode_t *s2, size_t count) { 
  register ssize_t l1 = DpsUniLen(s1) - 1, l2 = DpsUniLen(s2) - 1, l = count;
  while (l1 >= 0 && l2 >= 0 && l > 0) {
    if (s1[l1] < s2[l2]) return -1;
    if (s1[l1] > s2[l2]) return 1;
    l1--;
    l2--;
    l--;
  }
  if (l == 0) return 0;
  if (l1 < l2) return -1;
  if (l1 > l2) return 1;
  if (*s1 < *s2) return -1;
  if (*s1 > *s2) return 1;
  return 0;
}

Exemple #7

0

Afficher le fichier

Fichier : unicode.c Projet : github188/SimpleCode

dpsunicode_t *DpsUniNDup(const dpsunicode_t *s, size_t len) {
	dpsunicode_t *res;
	size_t size = DpsUniLen(s);
	if (size > len) size = len;
	if((res = (dpsunicode_t*)DpsMalloc((size + 1) * sizeof(*s))) == NULL) return(NULL);
	dps_memmove(res, s, size * sizeof(*s));
	res[size] = 0;
	return res;
}

Exemple #8

0

Afficher le fichier

Fichier : unicode.c Projet : github188/SimpleCode

dpsunicode_t *DpsUniDup(const dpsunicode_t *s) {
	dpsunicode_t *res;
	size_t size;
	
	size = (DpsUniLen(s)+1)*sizeof(*s);
	if((res=(dpsunicode_t*)DpsMalloc(size)) == NULL)
		return(NULL);
	dps_memcpy(res, s, size); /* was: dps_memmove */
	return res;
}

Exemple #9

0

Afficher le fichier

Fichier : unicode.c Projet : github188/SimpleCode

/* string copy */
dpsunicode_t *DpsUniStrCpy(dpsunicode_t *dst, const dpsunicode_t *src) {
/*
  register dpsunicode_t *d = dst; register const dpsunicode_t *s = src;
  while (*s) {
    *d = *s; d++; s++;
  }
  *d = *s;
  return dst;
*/
  register size_t n = DpsUniLen(src) + 1;
  return dps_memmove(dst, src, n * sizeof(dpsunicode_t));
}

Exemple #10

0

Afficher le fichier

Fichier : unicode.c Projet : github188/SimpleCode

dpsunicode_t *DpsUniStrNCpy(dpsunicode_t *dst, const dpsunicode_t *src, size_t len) {
/*
  register dpsunicode_t *d = dst; register const dpsunicode_t *s = src; register size_t l = len;
  while (*s && l) {
    *d = *s; d++; s++;
    l--;
  }
  if (l) *d = *s;
  return dst;
*/
  register size_t n = DpsUniLen(src) + 1;
  return dps_memmove(dst, src, sizeof(dpsunicode_t) * ((n < len) ? n : len));
}

Exemple #11

0

Afficher le fichier

Fichier : unicode.c Projet : github188/SimpleCode

dpsunicode_t *DpsUniRDup(const dpsunicode_t *s) {
	dpsunicode_t *res;
	size_t size, len;
	
	size = ((len = DpsUniLen(s)) + 1) * sizeof(*s);
	if((res=(dpsunicode_t*)DpsMalloc(size)) == NULL)
		return(NULL);
	{
	  register size_t z;
	  size = len - 1;
	  for (z = 0; z < len; z++) res[z] = s[size - z];
	  res[len] = 0;
	}
	return res;
}

Exemple #12

0

Afficher le fichier

Fichier : chinese.c Projet : invokerj/dpsearch-4.53

dpsunicode_t *DpsSegmentByFreq(DPS_CHINALIST *List, dpsunicode_t *line) {
  dpsunicode_t *out, *mid, *last, *sentence, *segmented_sentence, part;
  size_t i, j, l, a;
  int /*reg = 1,*/ ctype, have_bukva_forte, fb_type;
  dpsunicode_t space[] = { 32, 0 };

  l = 2 * (DpsUniLen(line) + 1);
  if (l < 2) return NULL;
  out = (dpsunicode_t*)DpsMalloc(l * sizeof(dpsunicode_t));
  if (out == NULL) return NULL;
  *out = '\0';
  mid = (dpsunicode_t*)DpsXmalloc(l * sizeof(dpsunicode_t));
  if (mid == NULL) { DPS_FREE(out); return NULL; }
  *mid = '\0';
  
  for (i = j = 0; i < DpsUniLen(line); i++) {
/*    if (line[i] >= 0x80) {
      if (reg == 0) {
	mid[j++] = *space;
	reg = 1;
      }
    } else {
      if (reg == 1) {
	mid[j++] = *space;
	reg = 0;
      }
    }*/
    mid[j++] = line[i];
  }
/*  mid[j] = 0;*/

  for (sentence = DpsUniGetSepToken(/*line*/ mid, &last, &ctype, &have_bukva_forte, 0);
       sentence;
       sentence = DpsUniGetSepToken(NULL, &last, &ctype, &have_bukva_forte, 0)) {
    part = *last;
    *last = 0;
    fb_type = DpsUniCType(*sentence);

    if (fb_type > DPS_UNI_BUKVA || fb_type == 2 || fb_type == 1) {
      a = 2 * (DpsUniLen(sentence) + 1);
      j = DpsUniLen(out);
      if (j + a >= l) {
	l = j + a + 1;
	out = (dpsunicode_t*)DpsRealloc(out, l * sizeof(dpsunicode_t));
	if (out == NULL) {
	  DPS_FREE(mid); return NULL;
	}
      }
      if (*out) DpsUniStrCat(out, space);
      DpsUniStrCat(out, sentence);
    } else {
      if ((segmented_sentence = DpsSegmentProcess(List, sentence)) != NULL) {
	a = 2 * (DpsUniLen(segmented_sentence) + 1);
	j = DpsUniLen(out);
	if (j + a >= l) {
	  l = j + a + 1;
	  out = (dpsunicode_t*)DpsRealloc(out, l * sizeof(dpsunicode_t));
	  if (out == NULL) {
	    DPS_FREE(mid); return NULL;
	  }
	}
	if (*out) DpsUniStrCat(out, space);
	DpsUniStrCat(out, segmented_sentence);
	DPS_FREE(segmented_sentence);
      } else {
	  DPS_FREE(mid); return NULL;
      }
    }
    *last = part;
  }

  DPS_FREE(mid);
  
  return out;
}

Exemple #13

0

Afficher le fichier

Fichier : chinese.c Projet : invokerj/dpsearch-4.53

static dpsunicode_t *DpsSegmentProcess(DPS_CHINALIST *List, dpsunicode_t *line) {
  int top, nextid, *position, *next, len, maxid, i, current, father, needinsert, iindex;
  unsigned int h;
  double *value, p;
  dpsunicode_t **result;
  dpsunicode_t *otv, space[] = {32, 0};
  DPS_CHINAWORD *chinaword, chiw;

  if (/*(line[0] >= 0x80) &&*/ (List->hash != NULL)) {

    len = DpsUniLen(line);
    maxid = 2 * len + 1;
    position = (int*)DpsMalloc(maxid * sizeof(int));
    if (position == NULL) return NULL;
    next = (int*)DpsMalloc(maxid * sizeof(int));
    if (next == NULL) {
      DPS_FREE(position);
      return NULL;
    }
    value = (double*)DpsMalloc(maxid * sizeof(double));
    if (value == NULL) {
      DPS_FREE(position); DPS_FREE(next);
      return NULL;
    }
    result = (dpsunicode_t **)DpsMalloc(maxid * sizeof(dpsunicode_t *));
    if (result == NULL) {
      DPS_FREE(position); DPS_FREE(next); DPS_FREE(value);
      return NULL;
    }
    
    top = 0;
/*    value[0] = 1;*/
    value[0] = 1.0 * List->total * len; 
    position[0] = 0;
    next[0] = -1;
    result[0] = (dpsunicode_t*)DpsUniDup(&space[1]);
    nextid = 1;

/*    fprintf(stderr, "SegmentProcess start: len -- %d\n", len);*/

    while ((top != -1) && (!((position[top] >= len) && (next[top] == -1)))) {

/*      fprintf(stderr, "top: %d  position: %d (len: %d)  next:%d\n", top, position[top], len, next[top]);*/


/*   # find the first open path */
      current = top;
      father = top;
      while ((current != -1) && (position[current] >= len)) {
	father = current;
	current = next[current];
      }
/*   # remove this path */
      if (current == top) {
	top = next[top];
      } else {
	next[father] = next[current];
      }

      if (current == -1) {
/*       # no open path, finished, take the first path */
	next[top] = -1;
      } else {
	otv = &line[position[current]];
	h = (unsigned int)(otv[0] & 0xffff);

/*       # if the first character doesn't have word phrase in the dict.*/
	if (List->hash[h] == 0) {
	  List->hash[h] = 1 /*2*/;
	}

	i = List->hash[h];
	if (i + position[current] > len) {
	  i = len - position[current];
	}
	/*i = i + 1*/ /*2*/;
	otv = NULL;
	for (; i > 0; i-- /*2*/) {
	  /*i = i - 1*/ /*2*/;
	  DPS_FREE(otv);
	  otv = DpsUniNDup(&line[position[current]], (size_t)i);
	  chinaword = DpsChineseListFind(List, otv);

	  if (i == 1 /*2*/ && chinaword == NULL) {
	    DPS_FREE(otv);
	    otv = DpsUniNDup(&line[position[current]], 1/*2*/);
	    chiw.word = otv;
	    chiw.freq = 1;
	    DpsChineseListAdd(List, chinaword = &chiw);
/*	    DpsChineseListSort(List);*/
	    /*i = 1*//*2*//*;*/
	  }

	  if ((chinaword != NULL) && chinaword->freq) {
/*       # pronode()   */
/*	  value[nextid] = value[current] * chinaword->freq / List->total;*/
	    p = (double)chinaword->freq / List->total;
	    value[nextid] = value[current] / (-1.0 * log(p) / log(10.0));
	    position[nextid] = position[current] + i;
	    h = DpsUniLen(result[current]) + DpsUniLen(otv) + 2;
	    result[nextid] = (dpsunicode_t*)DpsXmalloc((size_t)h * sizeof(dpsunicode_t));
	    if (result[nextid] == NULL) {
	      DPS_FREE(position); DPS_FREE(next); DPS_FREE(value); DPS_FREE(result);
	      return NULL;
	    }
	    DpsUniStrCpy(result[nextid], result[current]);
	    DpsUniStrCat(result[nextid], space);
	    DpsUniStrCat(result[nextid], otv);
/*
    # check to see whether there is duplicated path
    # if there is a duplicate path, remove the small value path
*/
	    needinsert = 1;
	    iindex = top;
	    father = top;
	    while (iindex != -1) {
	      if (position[iindex] == position[nextid]) {
		if (0.85 * value[iindex] >= value[nextid]) {
		  needinsert = 0;
		} else {
		  if (top == iindex) {
		    next[nextid] = next[iindex];
		    top = nextid;
		    needinsert = 0;
    /*          } else {
	          next[nextid] = next[father];*/ /*  next[father] = next[nextid];*/
		  }
		}
		iindex = -1;
	      } else {
		father = iindex;
		iindex = next[iindex];
	      }
	    }
/*    # insert the new path into the list */
/*	    fprintf(stderr, "current:%d  position:%d  i:%d  value[current]:%.12lf  nextid:%d  value[nextid]:%.12lf\n", 
		    current, position[current], i, value[current], nextid, value[nextid]);*/
	    if (needinsert == 1) {
	      while ((iindex != -1) && (value[iindex] > value[nextid])) {
		father = iindex;
		iindex = next[iindex];
	      }
	      if (top == iindex) {
		next[nextid] = top;
		top = nextid;
	      } else {
		next[father] = nextid;
		next[nextid] = iindex;
	      }
	    }
	    nextid++;
	    if (nextid >= maxid) {
	      maxid +=128;
	      position = (int*)DpsRealloc(position, maxid * sizeof(int));
	      next = (int*)DpsRealloc(next, maxid * sizeof(int));
	      value = (double*)DpsRealloc(value, maxid * sizeof(double));
	      result = (dpsunicode_t **)DpsRealloc(result, maxid * sizeof(dpsunicode_t *));
	      if (position == NULL || next == NULL || value == NULL || result == NULL) {
		DPS_FREE(position); DPS_FREE(next); DPS_FREE(value);
		if (result != NULL) {
		  for (i = 0; i < nextid; i++) {
		    if (i != top) DPS_FREE(result[i]);
		  }
		  DPS_FREE(result);
		}
		return NULL;
	      }
	    }
	  }

	} /*while ((i >= 1) && ( chinaword == NULL));*/


	DPS_FREE(otv);
      }
    }

    DPS_FREE(position); DPS_FREE(next);
    for (i = 0; i < nextid; i++) {
      if (i != top) DPS_FREE(result[i]);
    }
    otv = result[top];
    DPS_FREE(value); DPS_FREE(result);
    return otv;

  } else {
    return (dpsunicode_t*)DpsUniDup(line);
  }
}

Exemple #14

0

Afficher le fichier

Fichier : unicode.c Projet : github188/SimpleCode

/* string append */
dpsunicode_t *DpsUniStrCat(dpsunicode_t *s, const dpsunicode_t *append) {
  size_t len = DpsUniLen(s);
  DpsUniStrCpy(&s[len], append);
  return s;
}

Exemple #15

0

Afficher le fichier

Fichier : boolean.c Projet : github188/SimpleCode

int DpsAddStackItem(DPS_AGENT *query, DPS_RESULT *Res, DPS_PREPARE_STATE *state, char *word, dpsunicode_t *uword) {
  int origin;
  size_t      i; 
  size_t wlen = (uword == NULL) ? 0 : DpsUniLen(uword);
  dpshash32_t crcword = (word == NULL) ? 0 : DpsStrHash32(word);

#ifdef DEBUG_BOOL
  DpsLog(query, DPS_LOG_EXTRA, "0[%d].%x %c -- %s [%x] .secno:%d\n", state->order, state->origin, item_type(state->cmd), 
	 (word == NULL) ? "<NULL>" : word, crcword, state->secno[state->p_secno]);
#endif

  if((uword != NULL) && ( DpsStopListFind(&query->Conf->StopWords, uword, state->qlang) ||
			  (query->WordParam.min_word_len > wlen) ||
			  (query->WordParam.max_word_len < wlen)) ) {

    origin = state->origin | DPS_WORD_ORIGIN_STOP;
  } else {
    origin = state->origin;
  }

  if (state->cmd == DPS_STACK_WORD && !(origin & DPS_WORD_ORIGIN_QUERY)) {
    for (i = 0; i < Res->nitems; i++) {
      if ((Res->items[i].order == state->order) && (Res->items[i].crcword == crcword)) return DPS_OK;
    }
  }

  if (Res->nitems >= Res->mitems - 2) {
    Res->mitems += DPS_MAXSTACK;
    Res->items = (DPS_STACK_ITEM*)DpsRealloc(Res->items, Res->mitems * sizeof(DPS_STACK_ITEM));
    if (Res->items == NULL) {
      DpsLog(query, DPS_LOG_ERROR, "Can't alloc %d bytes for %d mitems", Res->mitems * sizeof(DPS_STACK_ITEM), Res->mitems);
      return DPS_ERROR;
    }
  }
      
  if (Res->nitems > 0) {
    if (state->cmd == DPS_STACK_OR || state->cmd == DPS_STACK_AND || state->cmd == DPS_STACK_NEAR || state->cmd == DPS_STACK_ANYWORD) {
      if (Res->items[Res->nitems-1].cmd == DPS_STACK_AND || Res->items[Res->nitems-1].cmd == DPS_STACK_OR 
	  || Res->items[Res->nitems-1].cmd == DPS_STACK_NEAR ||  Res->items[Res->nitems-1].cmd == DPS_STACK_ANYWORD) {
	return DPS_OK;
      }
    }

    if ((Res->nitems > 0) && (state->cmd == DPS_STACK_WORD) 
	&& (
	    (Res->items[Res->nitems-1].cmd == DPS_STACK_WORD)
	    || (Res->items[Res->nitems-1].cmd == DPS_STACK_RIGHT)
	    || (Res->items[Res->nitems-1].cmd == DPS_STACK_PHRASE_RIGHT)
	    )) {
      Res->items[Res->nitems].cmd = DPS_STACK_OR;
      Res->items[Res->nitems].order = 0;
      Res->items[Res->nitems].origin = 0;
      Res->items[Res->nitems].count = 0;
      Res->items[Res->nitems].len = 0;
      Res->items[Res->nitems].crcword = 0;
      Res->items[Res->nitems].word = NULL;
      Res->items[Res->nitems].ulen = 0;
      Res->items[Res->nitems].uword = NULL;
      Res->items[Res->nitems].pbegin = NULL;
      Res->items[Res->nitems].order_origin = 0;
      Res->items[Res->nitems].secno = state->secno[state->p_secno];
      Res->nitems++;
      Res->ncmds++;
#ifdef DEBUG_BOOL
  DpsLog(query, DPS_LOG_EXTRA, "1[%d].%x %c -- %s", 0, 0, item_type(DPS_STACK_OR), "<NULL>");
#endif
    }
    if ((Res->nitems > 0) && (state->cmd == DPS_STACK_LEFT) 
	&& (
	    (Res->items[Res->nitems-1].cmd == DPS_STACK_RIGHT)
	    || (Res->items[Res->nitems-1].cmd == DPS_STACK_PHRASE_RIGHT)
	    )) {
      Res->items[Res->nitems].cmd = state->add_cmd;
      Res->items[Res->nitems].order = 0;
      Res->items[Res->nitems].origin = 0;
      Res->items[Res->nitems].count = 0;
      Res->items[Res->nitems].len = 0;
      Res->items[Res->nitems].crcword = 0;
      Res->items[Res->nitems].word = NULL;
      Res->items[Res->nitems].ulen = 0;
      Res->items[Res->nitems].uword = NULL;
      Res->items[Res->nitems].pbegin = NULL;
      Res->items[Res->nitems].order_origin = 0;
      Res->items[Res->nitems].secno = state->secno[state->p_secno];
      Res->nitems++;
      Res->ncmds++;
#ifdef DEBUG_BOOL
  DpsLog(query, DPS_LOG_EXTRA, "1[%d].%x %c -- %s", 0, 0, item_type(state->add_cmd), "<NULL>");
#endif
    }
  }

  Res->items[Res->nitems].cmd = state->cmd;
  Res->items[Res->nitems].order = state->order;
  Res->items[Res->nitems].order_inquery = state->order_inquery;
  Res->items[Res->nitems].origin = origin;
  Res->items[Res->nitems].count = 0;
  Res->items[Res->nitems].len = (word == NULL) ? 0 : dps_strlen(word);
  Res->items[Res->nitems].crcword = crcword;
  Res->items[Res->nitems].word = (word == NULL) ? NULL : DpsStrdup(word);
  Res->items[Res->nitems].ulen = wlen;
  Res->items[Res->nitems].uword = (uword == NULL) ? NULL : DpsUniDup(uword);
  Res->items[Res->nitems].pbegin = NULL;
  Res->items[Res->nitems].order_origin = 0;
  Res->items[Res->nitems].wordnum = Res->nitems;
  Res->items[Res->nitems].secno = state->secno[state->p_secno];
  Res->nitems++;
  if (state->cmd != DPS_STACK_WORD) {
    Res->ncmds++;
  } else {
    Res->items[state->order].order_origin |= origin;
  if (state->order > Res->max_order) Res->max_order = state->order;
  if (state->order_inquery > Res->max_order_inquery) Res->max_order_inquery = state->order;
  }
/*  if ((state->cmd == DPS_STACK_WORD) && state->order > Res->max_order) Res->max_order = state->order;*/
#ifdef DEBUG_BOOL
  DpsLog(query, DPS_LOG_EXTRA, "1[%d,%d].%x %c -- %s", state->order, state->order_inquery, state->origin, item_type(state->cmd), 
	 (word == NULL) ? "<NULL>" : word);
#endif

  return DPS_OK;
}

Exemple #16

0

Afficher le fichier

Fichier : sea.c Projet : BackupGGCode/dataparksearch

int DpsSEAMake(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc, DPS_DSTR *excerpt,  
	       const char *content_lang, size_t *indexed_size, size_t *indexed_limit, 
	       size_t max_word_len, size_t min_word_len, int crossec, int seasec
#ifdef HAVE_ASPELL
	       , int have_speller, AspellSpeller *speller
#endif
	       ) {
  DPS_SENTENCELIST List;
  DPS_MAPSTAT MapStat;
  DPS_TEXTITEM Item;
  DPS_VAR	*Sec;
  dpsunicode_t *sentence, *lt, savec;
  double *links, *lang_cs, w;
  double delta, pdiv, cur_div;
  size_t l, sent_len, order;
  size_t min_len = 10000000, min_pos = 0;
  int  it;
  register size_t i, j;
#ifdef DEBUG
  char lcstr[4096];

#endif

  TRACE_IN(Indexer, "DpsSEAMake");

  if((Sec = DpsVarListFind(&Doc->Sections, "sea"))) { /* set SEA section to NULL */
    DPS_FREE(Sec->val);
    DPS_FREE(Sec->txt_val);
    Sec->curlen = 0;
  }
  
  bzero(&List, sizeof(List));
  order = 0;
  sentence = DpsUniStrTok_SEA((dpsunicode_t*)excerpt->data, &lt);
  while(sentence) {
    if (lt != NULL) { savec = *lt; *lt = 0; }
#ifdef DEBUG
    DpsConv(&Indexer->uni_lc, lcstr, sizeof(lcstr), (char*)sentence, sizeof(dpsunicode_t) * (DpsUniLen(sentence) + 1));
    fprintf(stderr, "Sentence.%d: %s\n", List.nitems, lcstr);
#endif
    if ((sent_len = DpsUniLen(sentence)) >= Indexer->Flags.SEASentenceMinLength) {
      j = 1;
      for (i = 0; i < List.nitems; i++) {
	if (DpsUniStrCmp(sentence, List.Sent[i].sentence) == 0) {
	  j = 0; break;
	}
      }
      if (j) {
	if ( List.nitems < Indexer->Flags.SEASentences ) {
	  if (List.nitems == List.mitems) {
	    List.mitems += 16;
	    List.Sent = (DPS_SENTENCE*)DpsRealloc(List.Sent, List.mitems * sizeof(DPS_SENTENCE));
	    if (List.Sent == NULL) { TRACE_OUT(Indexer); return DPS_ERROR;}
	  }
	  List.Sent[List.nitems].sentence = DpsUniDup(sentence);
	  List.Sent[List.nitems].len = sent_len;
	  List.Sent[List.nitems].order = order++;
	  sentence = DpsUniDup(sentence);
	  DpsUniStrToLower(sentence);
	  bzero(&List.Sent[List.nitems].LangMap, sizeof(DPS_LANGMAP));
	  DpsBuildLangMap(&List.Sent[List.nitems].LangMap, (char*)sentence, sent_len * sizeof(dpsunicode_t), 0, 0);
	  if (sent_len < min_len) { min_len = sent_len; min_pos = List.nitems; }
	  List.nitems++;
	  DPS_FREE(sentence);
	} else if (sent_len > min_len) {
	  DPS_FREE(List.Sent[min_pos].sentence);
	  List.Sent[min_pos].sentence = DpsUniDup(sentence);
	  List.Sent[min_pos].len = sent_len;
	  List.Sent[min_pos].order = order++;
	  sentence = DpsUniDup(sentence);
	  DpsUniStrToLower(sentence);
	  bzero(&List.Sent[min_pos].LangMap, sizeof(DPS_LANGMAP));
	  DpsBuildLangMap(&List.Sent[min_pos].LangMap, (char*)sentence, sent_len * sizeof(dpsunicode_t), 0, 0);
	  DPS_FREE(sentence);
	  min_len = List.Sent[0].len; min_pos = 0;
	  for(i = 1; i < List.nitems; i++) if (List.Sent[i].len < min_len) { min_len = List.Sent[i].len; min_pos = i; }
	}
      }
    }
#ifdef DEBUG
    fprintf(stderr, "Sent. len.:%d, Min.allowed: %d\n", sent_len, Indexer->Flags.SEASentenceMinLength);
#endif
    if (lt != NULL) *lt = savec;
    sentence = DpsUniStrTok_SEA(NULL, &lt);
  }
  DpsLog(Indexer, DPS_LOG_DEBUG, "SEA sentences: %d", List.nitems);
  if (List.nitems < 4) {
    for (i = 0; i < List.nitems; i++) DPS_FREE(List.Sent[i].sentence);
    DPS_FREE(List.Sent); 
    TRACE_OUT(Indexer);
    return DPS_OK; 
  }

  links = (double*)DpsMalloc(sizeof(double) * List.nitems * List.nitems);
  lang_cs = (double*)DpsMalloc(sizeof(double) * List.nitems);
/*
        k                 ot
  links[i * List.nitems + j] 
*/

  if (links != NULL && lang_cs != NULL) {

    for (i = 0; i < List.nitems; i++) {
      DpsPrepareLangMap(&List.Sent[i].LangMap);
    }

    for (i = 0; i < List.nitems; i++) {
      List.Sent[i].Oi =  List.Sent[i].di = 0.5;
      if (Doc->lang_cs_map == NULL) {
	  links[i * List.nitems + i] = 0.0;
      } else {
	MapStat.map = &List.Sent[i].LangMap;
	DpsCheckLangMap6(Doc->lang_cs_map, &List.Sent[i].LangMap, &MapStat, DPS_LM_TOPCNT * DPS_LM_TOPCNT, 2 * DPS_LM_TOPCNT);
	links[i * List.nitems + i] = (double)MapStat.hits / (2.0 * DPS_LM_TOPCNT) / (List.nitems + 1);
      }
#ifdef DEBUG
      DpsLog(Indexer, DPS_LOG_INFO, "Link %u->%u: %f [hits:%d miss:%d]", i, i, links[i * List.nitems + i], MapStat.hits, MapStat.miss);
#endif
      for (j = 0; j < List.nitems; j++) {
	  if (j == i) continue;
	MapStat.map = &List.Sent[j].LangMap;
	DpsCheckLangMap6(&List.Sent[j].LangMap, &List.Sent[i].LangMap, &MapStat, DPS_LM_TOPCNT * DPS_LM_TOPCNT, 2 * DPS_LM_TOPCNT);

	links[i * List.nitems + j] = (double)MapStat.hits / (2.0 * DPS_LM_TOPCNT) / (List.nitems + 1);
#ifdef DEBUG
	DpsLog(Indexer, DPS_LOG_INFO, "Link %u->%u: %f [hits:%d miss:%d]", i, j, links[i * List.nitems + j], MapStat.hits, MapStat.miss);
#endif
      }
    }

    for (l = 0; l < List.nitems; l++) {
	w = 0.0;
	for (i = 0; i < List.nitems; i++) { 
	    w += links[l * List.nitems + i] * List.Sent[i].Oi;
	}
	w = f(w);
	if (w < LOW_BORDER_EPS2) w = LOW_BORDER_EPS2;
	else if (w > HI_BORDER_EPS2) w = HI_BORDER_EPS2;
	List.Sent[l].di = w;
    }

    DpsSort(List.Sent, List.nitems, sizeof(DPS_SENTENCE), (qsort_cmp)SentCmp);

#ifdef DEBUG
    DpsConv(&Indexer->uni_lc, lcstr, sizeof(lcstr), (char*)List.Sent[0].sentence, sizeof(dpsunicode_t) * (DpsUniLen(List.Sent[0].sentence) + 1));
    fprintf(stderr, "Sent.0: %f %f -- %s\n", List.Sent[0].di, List.Sent[0].Oi, lcstr);
    DpsConv(&Indexer->uni_lc, lcstr, sizeof(lcstr), (char*)List.Sent[1].sentence, sizeof(dpsunicode_t) * (DpsUniLen(List.Sent[1].sentence) + 1));
    fprintf(stderr, "Sent.1: %f %f -- %s\n", List.Sent[1].di, List.Sent[1].Oi, lcstr);
    DpsConv(&Indexer->uni_lc, lcstr, sizeof(lcstr), (char*)List.Sent[2].sentence, sizeof(dpsunicode_t) * (DpsUniLen(List.Sent[2].sentence) + 1));
    fprintf(stderr, "Sent.2: %f %f -- %s\n", List.Sent[2].di, List.Sent[2].Oi, lcstr);
    DpsConv(&Indexer->uni_lc, lcstr, sizeof(lcstr), (char*)List.Sent[3].sentence, sizeof(dpsunicode_t) * (DpsUniLen(List.Sent[3].sentence) + 1));
    fprintf(stderr, "Sent.3: %f %f -- %s\n", List.Sent[3].di, List.Sent[3].Oi, lcstr);
    DpsConv(&Indexer->uni_lc, lcstr, sizeof(lcstr), (char*)List.Sent[4].sentence, sizeof(dpsunicode_t) * (DpsUniLen(List.Sent[4].sentence) + 1));
    fprintf(stderr, "Sent.4: %f %f -- %s\n", List.Sent[4].di, List.Sent[4].Oi, lcstr);
#endif
    DpsSort(List.Sent, TOP_SENTENCES, sizeof(DPS_SENTENCE), (qsort_cmp)SentOrderCmp);

    bzero(&Item, sizeof(Item));
    Item.section = seasec;
    Item.href = NULL;
    Item.section_name = "sea";
    for (i = 0; i < TOP_SENTENCES; i++) {
      dpsunicode_t *UStr = DpsUniDup(List.Sent[i].sentence);
      DpsPrepareItem(Indexer, Doc, &Item, List.Sent[i].sentence, UStr, content_lang, indexed_size, indexed_limit,
		     max_word_len, min_word_len, crossec
#ifdef HAVE_ASPELL
		     , have_speller, speller, NULL
#endif
		     );
      DPS_FREE(UStr);
    }
  }
  DPS_FREE(lang_cs);
  DPS_FREE(links);
  for (i = 0; i < List.nitems; i++) DPS_FREE(List.Sent[i].sentence);
  DPS_FREE(List.Sent);

  TRACE_OUT(Indexer);
  return DPS_OK;
}