static void DpsChineseListAdd(DPS_CHINALIST *List, DPS_CHINAWORD * chinaword){ dpsunicode_t *nfc = DpsUniNormalizeNFC(NULL, chinaword->word); DPS_CHINAWORD *F = DpsChineseListFind(List, nfc); if (F != NULL) { F->freq += chinaword->freq; List->total += chinaword->freq; DPS_FREE(nfc); return; } { DPS_CHINAWORD cw = *chinaword; cw.word = nfc; DpsChineseListAddBundle(List, &cw); } if (List->nwords > 1) DpsChineseSortForLast(List->ChiWord, List->nwords); }
__C_LINK int __DPSCALL DpsSynonymListLoad(DPS_ENV * Env,const char * filename){ struct stat sb; char *str, *data = NULL, *cur_n = NULL; char lang[64]=""; DPS_CHARSET *cs=NULL; DPS_CHARSET *sys_int=DpsGetCharSet("sys-int"); DPS_CONV file_uni; DPS_WIDEWORD *ww = NULL; size_t key = 1; int flag_th = 0; int fd; char savebyte; if (stat(filename, &sb)) { fprintf(stderr, "Unable to stat synonyms file '%s': %s", filename, strerror(errno)); return DPS_ERROR; } if ((fd = DpsOpen2(filename, O_RDONLY)) <= 0) { dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to open synonyms file '%s': %s", filename, strerror(errno)); return DPS_ERROR; } if ((data = (char*)DpsMalloc(sb.st_size + 1)) == NULL) { dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to alloc %d bytes", sb.st_size); DpsClose(fd); return DPS_ERROR; } if (read(fd, data, sb.st_size) != (ssize_t)sb.st_size) { dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to read synonym file '%s': %s", filename, strerror(errno)); DPS_FREE(data); DpsClose(fd); return DPS_ERROR; } data[sb.st_size] = '\0'; str = data; cur_n = strchr(str, '\n'); if (cur_n != NULL) { cur_n++; savebyte = *cur_n; *cur_n = '\0'; } while(str != NULL) { if(str[0]=='#'||str[0]==' '||str[0]=='\t'||str[0]=='\r'||str[0]=='\n') goto loop_continue; if(!strncasecmp(str,"Charset:",8)){ char * lasttok; char * charset; if((charset = dps_strtok_r(str + 8, " \t\n\r", &lasttok))) { cs=DpsGetCharSet(charset); if(!cs){ dps_snprintf(Env->errstr, sizeof(Env->errstr), "Unknown charset '%s' in synonyms file '%s'", charset, filename); DPS_FREE(data); DpsClose(fd); return DPS_ERROR; } DpsConvInit(&file_uni, cs, sys_int, Env->CharsToEscape, 0); } }else if(!strncasecmp(str,"Language:",9)){ char * lasttok; char * l; if((l = dps_strtok_r(str + 9, " \t\n\r", &lasttok))) { dps_strncpy(lang, l, sizeof(lang)-1); } }else if(!strncasecmp(str, "Thesaurus:", 10)) { char * lasttok; char *tok = dps_strtok_r(str + 10, " \t\n\r", &lasttok); flag_th = (strncasecmp(tok, "yes", 3) == 0) ? 1 : 0; }else{ char *av[255]; size_t ac, i, j; dpsunicode_t *t; if(!cs){ dps_snprintf(Env->errstr,sizeof(Env->errstr)-1,"No Charset command in synonyms file '%s'",filename); DpsClose(fd); DPS_FREE(data); return DPS_ERROR; } if(!lang[0]){ dps_snprintf(Env->errstr,sizeof(Env->errstr)-1,"No Language command in synonyms file '%s'",filename); DpsClose(fd); DPS_FREE(data); return DPS_ERROR; } ac = DpsGetArgs(str, av, 255); if (ac < 2) goto loop_continue; if ((ww = (DPS_WIDEWORD*)DpsRealloc(ww, ac * sizeof(DPS_WIDEWORD))) == NULL) return DPS_ERROR; for (i = 0; i < ac; i++) { ww[i].word = av[i]; ww[i].len = dps_strlen(av[i]); ww[i].uword = t = (dpsunicode_t*)DpsMalloc((3 * ww[i].len + 1) * sizeof(dpsunicode_t)); if (ww[i].uword == NULL) return DPS_ERROR; DpsConv(&file_uni, (char*)ww[i].uword, sizeof(dpsunicode_t) * (3 * ww[i].len + 1), av[i], ww[i].len + 1); DpsUniStrToLower(ww[i].uword); ww[i].uword = DpsUniNormalizeNFC(NULL, ww[i].uword); DPS_FREE(t); } for (i = 0; i < ac - 1; i++) { for (j = i + 1; j < ac; j++) { if((Env->Synonyms.nsynonyms + 1) >= Env->Synonyms.msynonyms){ Env->Synonyms.msynonyms += 64; Env->Synonyms.Synonym = (DPS_SYNONYM*)DpsRealloc(Env->Synonyms.Synonym, sizeof(DPS_SYNONYM)*Env->Synonyms.msynonyms); if (Env->Synonyms.Synonym == NULL) { Env->Synonyms.msynonyms = Env->Synonyms.nsynonyms = 0; return DPS_ERROR; } } bzero((void*)&Env->Synonyms.Synonym[Env->Synonyms.nsynonyms], sizeof(DPS_SYNONYM)); /* Add direct order */ Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.uword = DpsUniDup(ww[i].uword); Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.uword = DpsUniDup(ww[j].uword); Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.count = Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.count = (size_t)((flag_th) ? key : 0); Env->Synonyms.nsynonyms++; bzero((void*)&Env->Synonyms.Synonym[Env->Synonyms.nsynonyms], sizeof(DPS_SYNONYM)); /* Add reverse order */ Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.uword = DpsUniDup(ww[j].uword); Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.uword = DpsUniDup(ww[i].uword); Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.count = Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.count = (size_t)((flag_th) ? key : 0); Env->Synonyms.nsynonyms++; } } for (i = 0; i < ac; i++) { DPS_FREE(ww[i].uword); } do { key++; } while (key == 0); } loop_continue: str = cur_n; if (str != NULL) { *str = savebyte; cur_n = strchr(str, '\n'); if (cur_n != NULL) { cur_n++; savebyte = *cur_n; *cur_n = '\0'; } } } DPS_FREE(data); DPS_FREE(ww); DpsClose(fd); return DPS_OK; }