__C_LINK int __DPSCALL DpsBaseClose(DPS_BASE_PARAM *P) { TRACE_IN(P->A, "DpsBaseClose"); /* if (P->opened && (P->mode == DPS_WRITE_LOCK) ) { fsync(P->Sfd); fsync(P->Ifd); }*/ if (!P->A->Flags.cold_var && P->locked) { DpsUnLock(P->Sfd); DpsUnLock(P->Ifd); #if 1 DPS_RELEASELOCK(P->A, DPS_LOCK_BASE_N(P->FileNo)); #endif P->locked = 0; } if (P->opened){ DpsClose(P->Sfd); DpsClose(P->Ifd); P->opened = 0; } DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_OK; }
static int MakeNestedIndex(DPS_AGENT *Indexer, DPS_UINT8URLIDLIST *L, const char *lim_name, DPS_DB *db) { DPS_ENV *Conf = Indexer->Conf; size_t k, prev; urlid_t *data=NULL; DPS_UINT8_POS_LEN *ind=NULL; size_t mind=1000, nind=0, ndata; char fname[PATH_MAX]; int dat_fd=0, ind_fd=0; int rc=DPS_OK; const char *vardir = (db->vardir) ? db->vardir : DpsVarListFindStr(&Conf->Vars, "VarDir", DPS_VAR_DIR); if(!L->Item)return(1); if (L->nitems > 1) DpsSort(L->Item, L->nitems, sizeof(DPS_UINT8URLID), (qsort_cmp)cmp_ind8); data = (urlid_t*)DpsMalloc((L->nitems + 1) * sizeof(urlid_t)); if(!data){ DpsLog(Indexer, DPS_LOG_ERROR, "Can't alloc %d bytes [%s:%d]", (L->nitems + 1) * sizeof(urlid_t), __FILE__, __LINE__); goto err1; } ind=(DPS_UINT8_POS_LEN*)DpsMalloc(mind*sizeof(DPS_UINT8_POS_LEN)); if(!ind){ DpsLog(Indexer, DPS_LOG_ERROR, "Can't alloc %d bytes [%s:%d]", mind * sizeof(DPS_UINT8_POS_LEN), __FILE__, __LINE__); goto err1; } prev=0; for(k=0; k < L->nitems; k++) { data[k] = L->Item[k].url_id; if((k == L->nitems-1) || (L->Item[k].hi != L->Item[prev].hi) || (L->Item[k].lo != L->Item[prev].lo)) { if(nind==mind){ mind+=1000; ind=(DPS_UINT8_POS_LEN*)DpsRealloc(ind,mind*sizeof(DPS_UINT8_POS_LEN)); if(!ind) { DpsLog(Indexer, DPS_LOG_ERROR, "Can't alloc %d bytes [%s:%d]", mind * sizeof(DPS_UINT8_POS_LEN), __FILE__, __LINE__); goto err1; } } /* Fill index */ ind[nind].hi = L->Item[prev].hi; ind[nind].lo = L->Item[prev].lo; ind[nind].pos = prev * sizeof(*data); if (k == L->nitems - 1) ind[nind].len = (k - prev + 1) * sizeof(*data); else ind[nind].len = (k - prev) * sizeof(*data); DpsLog(Indexer, DPS_LOG_DEBUG, "%08X%08X - %d %d\n", ind[nind].hi, ind[nind].lo, (int)ind[nind].pos, ind[nind].len); nind++; prev=k; } } ndata = L->nitems; ClearIndex8(L); dps_snprintf(fname,sizeof(fname)-1,"%s%c%s%c%s.dat", vardir,DPSSLASH, DPS_TREEDIR,DPSSLASH, lim_name); if((dat_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) { DpsLog(Indexer, DPS_LOG_ERROR, "Can't open '%s': %s [%s:%d]", fname, strerror(errno), __FILE__, __LINE__); goto err1; } DpsWriteLock(dat_fd); if((ndata * sizeof(*data)) != (size_t)write(dat_fd, data, ndata * sizeof(*data))) { DpsLog(Indexer, DPS_LOG_ERROR, "Can't write '%s': %s [%s:%d]", fname, strerror(errno), __FILE__, __LINE__); goto err1; } DpsUnLock(dat_fd); DpsClose(dat_fd); DPS_FREE(data); dps_snprintf(fname,sizeof(fname)-1,"%s%c%s%c%s.ind", vardir, DPSSLASH,DPS_TREEDIR, DPSSLASH, lim_name); if((ind_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) { DpsLog(Indexer, DPS_LOG_ERROR, "Can't open '%s': %s [%s:%d]", fname, strerror(errno), __FILE__, __LINE__); goto err1; } DpsWriteLock(ind_fd); if((nind*sizeof(DPS_UINT8_POS_LEN)) != (size_t)write(ind_fd,ind,nind*sizeof(DPS_UINT8_POS_LEN))){ DpsLog(Indexer, DPS_LOG_ERROR, "Can't write '%s': %s [%s:%d]", fname, strerror(errno), __FILE__, __LINE__); goto err1; } DpsUnLock(ind_fd); DpsClose(ind_fd); DPS_FREE(ind); return(0); err1: ClearIndex8(L); DPS_FREE(data); DPS_FREE(ind); if(dat_fd) DpsClose(dat_fd); if(ind_fd) DpsClose(ind_fd); return(1); }
static int MakeLinearIndex(DPS_AGENT *Indexer, const char *field, const char *lim_name, int type, DPS_DB *db) { DPS_ENV *Conf = Indexer->Conf; DPS_UINT4URLIDLIST L; size_t k,prev; urlid_t *data = NULL; DPS_UINT4_POS_LEN *ind=NULL; size_t mind=1000,nind=0; char fname[PATH_MAX]; int dat_fd=0, ind_fd=0, rc; const char *vardir = (db->vardir) ? db->vardir : DpsVarListFindStr(&Conf->Vars, "VarDir", DPS_VAR_DIR); bzero(&L, sizeof(DPS_UINT4URLIDLIST)); rc = DpsLimit4(Indexer, &L, field, type, db); if(rc != DPS_OK) { DpsLog(Indexer, DPS_LOG_ERROR, "Error: %s [%s:%d]", DpsEnvErrMsg(Conf), __FILE__, __LINE__); goto err1; } if(!L.Item)return(1); if (L.nitems > 1) DpsSort(L.Item, L.nitems, sizeof(DPS_UINT4URLID), (qsort_cmp)cmp_ind4); data = (urlid_t*)DpsMalloc((L.nitems + 1) * sizeof(*data)); if(!data) { fprintf(stderr,"Error1: %s\n",strerror(errno)); goto err1; } ind=(DPS_UINT4_POS_LEN*)DpsMalloc(mind*sizeof(DPS_UINT4_POS_LEN)); if(!ind) { fprintf(stderr,"Error2: %s\n",strerror(errno)); goto err1; } prev=0; for(k=0; k<L.nitems; k++) { data[k]=L.Item[k].url_id; if((k==L.nitems-1) || (L.Item[k].val!=L.Item[prev].val)) { if(nind==mind) { mind+=1000; ind=(DPS_UINT4_POS_LEN*)DpsRealloc(ind,mind*sizeof(DPS_UINT4_POS_LEN)); if(!ind) { fprintf(stderr,"Error3: %s\n",strerror(errno)); goto err1; } } /* Fill index */ ind[nind].val=L.Item[prev].val; ind[nind].pos = prev * sizeof(*data); if (k == L.nitems - 1) ind[nind].len = (k - prev + 1) * sizeof(*data); else ind[nind].len = (k - prev) * sizeof(*data); DpsLog(Indexer, DPS_LOG_DEBUG, "%d - pos:%x len:%d\n", ind[nind].val, (int)ind[nind].pos, ind[nind].len); nind++; prev=k; } } if (L.mapped) { #ifdef HAVE_SYS_MMAN_H if (munmap(L.Item, (L.nitems + 1) * sizeof(DPS_UINT4URLID))) { fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno)); } #elif defined(HAVE_SYS_SHM_H) if (shmdt(L.Item)) { fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno)); } #endif unlink(L.shm_name); } else { DPS_FREE(L.Item); } dps_snprintf(fname,sizeof(fname),"%s%c%s%c%s.dat", vardir,DPSSLASH, DPS_TREEDIR, DPSSLASH, lim_name); if((dat_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) { fprintf(stderr,"Can't open '%s': %s\n",fname,strerror(errno)); goto err1; } DpsWriteLock(dat_fd); if((L.nitems * sizeof(*data)) != (size_t)write(dat_fd, data, L.nitems * sizeof(*data))) { fprintf(stderr,"Can't write '%s': %s\n",fname,strerror(errno)); goto err1; } DpsUnLock(dat_fd); DpsClose(dat_fd); DPS_FREE(data); dps_snprintf(fname,sizeof(fname),"%s%c%s%c%s.ind", vardir,DPSSLASH, DPS_TREEDIR, DPSSLASH, lim_name); if((ind_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) { fprintf(stderr,"Can't open '%s': %s\n",fname,strerror(errno)); goto err1; } DpsWriteLock(ind_fd); if((nind*sizeof(DPS_UINT4_POS_LEN)) != (size_t)write(ind_fd,ind,nind*sizeof(DPS_UINT4_POS_LEN))) { fprintf(stderr,"Can't write '%s': %s\n",fname,strerror(errno)); goto err1; } DpsUnLock(ind_fd); DpsClose(ind_fd); DPS_FREE(ind); return(0); err1: if (L.mapped) { #ifdef HAVE_SYS_MMAN_H if (munmap(L.Item, (L.nitems + 1) * sizeof(DPS_UINT4URLID))) { fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno)); } #elif defined(HAVE_SYS_SHM_H) if (shmdt(L.Item)) { fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno)); } #endif unlink(L.shm_name); } else { DPS_FREE(L.Item); } DPS_FREE(data); DPS_FREE(ind); if(dat_fd) DpsClose(dat_fd); if(ind_fd) DpsClose(ind_fd); return(1); }
int main(int argc,char **argv, char **envp) { int ch, sleeps = 1, optimize = 0, obi = 0; unsigned int from = 0, to = 0xFFF, p_to = 0; DPS_ENV * Env; const char * config_name = DPS_CONF_DIR "/cached.conf"; DpsInit(argc, argv, envp); /* Initialize library */ DpsInitMutexes(); Env=DpsEnvInit(NULL); if (Env == NULL) exit(1); DpsSetLockProc(Env, DpsLockProc); /*#ifndef HAVE_SETPROCTITLE*/ ARGV = argv; ARGC = argc; /*#endif*/ while ((ch = getopt(argc, argv, "blt:f:op:w:v:h?")) != -1){ switch (ch) { case 'f': sscanf(optarg, "%x", &from); break; case 't': sscanf(optarg, "%x", &p_to); break; case 'w': DpsVarListReplaceStr(&Env->Vars, "VarDir", optarg); break; case 'v': DpsSetLogLevel(NULL, atoi(optarg)); break; case 'b': obi++; break; case 'o': optimize++; break; case 'p': sleeps = atoi(optarg); break; case 'h': case '?': default: usage(); DpsEnvFree(Env); DpsDeInit(); DpsDestroyMutexes(); return 1; break; } } argc -= optind; argv += optind; if(argc > 1) { usage(); DpsEnvFree(Env); DpsDeInit(); DpsDestroyMutexes(); return 1; } else if (argc == 1) { config_name = argv[0]; } { DPS_LOGDEL *del_buf=NULL; size_t del_count = 0, log, bytes, n = 0; int dd, log_fd; struct stat sb; char dname[PATH_MAX] = ""; DPS_BASE_PARAM P; DPS_LOGWORD *log_buf = NULL; DPS_AGENT *Indexer = DpsAgentInit(NULL, Env, 0); log2stderr = 1; if (Indexer == NULL) { fprintf(stderr, "Can't alloc Agent at %s:%d\n", __FILE__, __LINE__); exit(DPS_ERROR); } if(DPS_OK != DpsEnvLoad(Indexer, config_name, (dps_uint8)0)){ fprintf(stderr, "%s\n", DpsEnvErrMsg(Env)); DpsEnvFree(Env); DpsDeInit(); DpsDestroyMutexes(); return DPS_ERROR; } DpsOpenLog("splitter", Env, log2stderr); Indexer->flags = Env->flags = DPS_FLAG_UNOCON; DpsVarListAddLst(&Indexer->Vars, &Env->Vars, NULL, "*"); bzero(&P, sizeof(P)); P.subdir = DPS_TREEDIR; P.basename = "wrd"; P.indname = "wrd"; P.mode = DPS_WRITE_LOCK; P.NFiles = DpsVarListFindInt(&Indexer->Conf->Vars, "WrdFiles", 0x300); P.vardir = DpsStrdup(DpsVarListFindStr(&Indexer->Conf->Vars, "VarDir", DPS_VAR_DIR)); P.A = Indexer; if (p_to != 0) to = p_to; else to = P.NFiles - 1; #ifdef HAVE_ZLIB P.zlib_method = Z_DEFLATED; P.zlib_level = 9; P.zlib_windowBits = DPS_BASE_WRD_WINDOWBITS; P.zlib_memLevel = 9; P.zlib_strategy = DPS_BASE_WRD_STRATEGY; #endif /* Open del log file */ dps_snprintf(dname,sizeof(dname),"%s%c%s%cdel-split.log", P.vardir, DPSSLASH, DPS_SPLDIR, DPSSLASH); if((dd = DpsOpen2(dname, O_RDONLY | DPS_BINARY)) < 0) { dps_strerror(NULL, 0, "Can't open del log '%s'", dname); exit(DPS_ERROR); } DpsLog(Indexer, DPS_LOG_DEBUG, "VarDir: %s, WrdFiles: %d [%x]", P.vardir, P.NFiles, P.NFiles); /* Allocate del buffer */ fstat(dd, &sb); if (sb.st_size != 0) { del_buf=(DPS_LOGDEL*)DpsMalloc((size_t)sb.st_size + 1); if (del_buf == NULL) { fprintf(stderr, "Can't alloc %d bytes at %s:%d\n", (int)sb.st_size, __FILE__, __LINE__); exit(0); } del_count=read(dd,del_buf,(size_t)sb.st_size)/sizeof(DPS_LOGDEL); } DpsClose(dd); /* Remove duplicates URLs in DEL log */ /* Keep only oldest records for each URL */ if (del_count > 0) { DpsLog(Indexer, DPS_LOG_DEBUG, "Sorting del_buf: %d items", del_count); if (del_count > 1) DpsSort(del_buf, (size_t)del_count, sizeof(DPS_LOGDEL), DpsCmpurldellog); DpsLog(Indexer, DPS_LOG_DEBUG, "Removing DelLogDups"); del_count = DpsRemoveDelLogDups(del_buf, del_count); } DpsLog(Indexer, DPS_LOG_DEBUG, "Processing Bufs from %d [%x] to %d [%x]", from, from, to, to); for(log = from; log <= to; log++) { /* Open log file */ dps_snprintf(dname, sizeof(dname), "%s%c%s%c%03X-split.log", P.vardir, DPSSLASH, DPS_SPLDIR, DPSSLASH, log); if((log_fd = DpsOpen2(dname, O_RDWR|DPS_BINARY)) < 0){ if (errno == ENOENT) { dps_strerror(Indexer, DPS_LOG_DEBUG, "Can't open '%s'", dname); n = 0; /* continue;*/ } else { dps_strerror(Indexer, DPS_LOG_ERROR, "Can't open '%s'", dname); continue; } } else { DpsWriteLock(log_fd); DpsLog(Indexer, DPS_LOG_DEBUG, "Processing Log: %x", log); fstat(log_fd, &sb); log_buf = (sb.st_size > 0) ? (DPS_LOGWORD*)DpsMalloc((size_t)sb.st_size + 1) : NULL; if (log_buf != NULL) { unlink(dname); bytes = read(log_fd,log_buf,(size_t)sb.st_size); (void)ftruncate(log_fd, (off_t)0); DpsUnLock(log_fd); DpsClose(log_fd); n = bytes / sizeof(DPS_LOGWORD); DpsLog(Indexer, DPS_LOG_DEBUG, "Sorting log_buf: %d items", n); if (n > 1) DpsSort(log_buf, n, sizeof(DPS_LOGWORD), (qsort_cmp)DpsCmplog); DpsLog(Indexer, DPS_LOG_DEBUG, "Removing OldWords"); n = DpsRemoveOldWords(log_buf, n, del_buf, del_count); if (n > 1) DpsSort(log_buf, n, sizeof(DPS_LOGWORD), (qsort_cmp)DpsCmplog_wrd); } else { n = 0; DpsUnLock(log_fd); DpsClose(log_fd); } } DpsLog(Indexer, DPS_LOG_DEBUG, "Processing Buf, optimize: %d", optimize); if (obi) DpsBaseOptimize(&P, log); DpsProcessBuf(Indexer, &P, log, log_buf, n, del_buf, del_count); if (optimize) DpsBaseOptimize(&P, log); DpsBaseClose(&P); DPS_FREE(log_buf); DpsLog(Indexer, DPS_LOG_DEBUG, "pas done: %d from %d to %d", log, from, to); DPSSLEEP(sleeps); } DPS_FREE(del_buf); DpsAgentFree(Indexer); DPS_FREE(P.vardir); } fprintf(stderr, "Splitting done.\n"); DpsEnvFree(Env); DpsDeInit(); DpsDestroyMutexes(); #ifdef EFENCE fprintf(stderr, "Memory leaks checking\n"); DpsEfenceCheckLeaks(); #endif #ifdef FILENCE fprintf(stderr, "FD leaks checking\n"); DpsFilenceCheckLeaks(NULL); #endif return 0; }
__C_LINK int __DPSCALL DpsImportDictionary(DPS_ENV * Conf, const char *lang, const char *charset, const char *filename, int skip_noflag, const char *first_letters){ struct stat sb; char *str, *data = NULL, *cur_n = NULL; char *lstr; dpsunicode_t *ustr; DPS_CHARSET *sys_int; DPS_CHARSET *dict_charset; DPS_CONV touni; DPS_CONV fromuni; int fd; char savebyte; if ((lstr = (char*) DpsMalloc(2048)) == NULL) { DPS_FREE(str); return DPS_ERROR; } if ((ustr = (dpsunicode_t*) DpsMalloc(8192)) == NULL) { DPS_FREE(lstr); return DPS_ERROR; } dict_charset = DpsGetCharSet(charset); sys_int = DpsGetCharSet("sys-int"); if ((dict_charset == NULL) || (sys_int == NULL)) { DPS_FREE(lstr); DPS_FREE(ustr); return DPS_ERROR; } DpsConvInit(&touni, dict_charset, sys_int, Conf->CharsToEscape, 0); DpsConvInit(&fromuni, sys_int, dict_charset, Conf->CharsToEscape, 0); if (stat(filename, &sb)) { fprintf(stderr, "Unable to stat synonyms file '%s': %s", filename, strerror(errno)); DPS_FREE(lstr); DPS_FREE(ustr); return DPS_ERROR; } if ((fd = DpsOpen2(filename, O_RDONLY)) <= 0) { fprintf(stderr, "Unable to open synonyms file '%s': %s", filename, strerror(errno)); return DPS_ERROR; } if ((data = (char*)DpsMalloc(sb.st_size + 1)) == NULL) { fprintf(stderr, "Unable to alloc %ld bytes", (long)sb.st_size); DpsClose(fd); DPS_FREE(lstr); DPS_FREE(ustr); return DPS_ERROR; } if (read(fd, data, sb.st_size) != (ssize_t)sb.st_size) { fprintf(stderr, "Unable to read synonym file '%s': %s", filename, strerror(errno)); DPS_FREE(data); DpsClose(fd); DPS_FREE(lstr); DPS_FREE(ustr); return DPS_ERROR; } data[sb.st_size] = '\0'; str = data; cur_n = strchr(str, '\n'); if (cur_n != NULL) { cur_n++; savebyte = *cur_n; *cur_n = '\0'; } DpsClose(fd); while(str != NULL) { char *s; const char *flag; int res; flag = NULL; s = str; while(*s){ if(*s == '\r') *s = '\0'; if(*s == '\n') *s = '\0'; s++; } if((s=strchr(str,'/'))){ *s=0; s++;flag=s; while(*s){ if(((*s>='A')&&(*s<='Z'))||((*s>='a')&&(*s<='z')))s++; else{ *s=0; break; } } }else{ if(skip_noflag) goto loop_continue; flag=""; } res = DpsConv(&touni, (char*)ustr, 8192, str, 1024); DpsUniStrToLower(ustr); /* Dont load words if first letter is not required */ /* It allows to optimize loading at search time */ if(*first_letters) { DpsConv(&fromuni, lstr, 2048, ((const char*)ustr),(size_t)res); if(!strchr(first_letters,lstr[0])) goto loop_continue; } res = DpsSpellAdd(&Conf->Spells,ustr,flag,lang); if (res != DPS_OK) { DPS_FREE(lstr); DPS_FREE(ustr); DPS_FREE(data); return res; } if (Conf->Flags.use_accentext) { dpsunicode_t *af_uwrd = DpsUniAccentStrip(ustr); if (DpsUniStrCmp(af_uwrd, ustr) != 0) { res = DpsSpellAdd(&Conf->Spells, af_uwrd, flag, lang); if (res != DPS_OK) { DPS_FREE(lstr); DPS_FREE(ustr); DPS_FREE(data); DPS_FREE(af_uwrd); return res; } } DPS_FREE(af_uwrd); if (strncasecmp(lang, "de", 2) == 0) { dpsunicode_t *de_uwrd = DpsUniGermanReplace(ustr); if (DpsUniStrCmp(de_uwrd, ustr) != 0) { res = DpsSpellAdd(&Conf->Spells, de_uwrd, flag, lang); if (res != DPS_OK) { DPS_FREE(lstr); DPS_FREE(ustr); DPS_FREE(data); DPS_FREE(de_uwrd); return res; } } DPS_FREE(de_uwrd); } } loop_continue: str = cur_n; if (str != NULL) { *str = savebyte; cur_n = strchr(str, '\n'); if (cur_n != NULL) { cur_n++; savebyte = *cur_n; *cur_n = '\0'; } } } DPS_FREE(data); DPS_FREE(lstr); DPS_FREE(ustr); return DPS_OK; }
__C_LINK int __DPSCALL DpsSynonymListLoad(DPS_ENV * Env,const char * filename){ struct stat sb; char *str, *data = NULL, *cur_n = NULL; char lang[64]=""; DPS_CHARSET *cs=NULL; DPS_CHARSET *sys_int=DpsGetCharSet("sys-int"); DPS_CONV file_uni; DPS_WIDEWORD *ww = NULL; size_t key = 1; int flag_th = 0; int fd; char savebyte; if (stat(filename, &sb)) { fprintf(stderr, "Unable to stat synonyms file '%s': %s", filename, strerror(errno)); return DPS_ERROR; } if ((fd = DpsOpen2(filename, O_RDONLY)) <= 0) { dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to open synonyms file '%s': %s", filename, strerror(errno)); return DPS_ERROR; } if ((data = (char*)DpsMalloc(sb.st_size + 1)) == NULL) { dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to alloc %d bytes", sb.st_size); DpsClose(fd); return DPS_ERROR; } if (read(fd, data, sb.st_size) != (ssize_t)sb.st_size) { dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to read synonym file '%s': %s", filename, strerror(errno)); DPS_FREE(data); DpsClose(fd); return DPS_ERROR; } data[sb.st_size] = '\0'; str = data; cur_n = strchr(str, '\n'); if (cur_n != NULL) { cur_n++; savebyte = *cur_n; *cur_n = '\0'; } while(str != NULL) { if(str[0]=='#'||str[0]==' '||str[0]=='\t'||str[0]=='\r'||str[0]=='\n') goto loop_continue; if(!strncasecmp(str,"Charset:",8)){ char * lasttok; char * charset; if((charset = dps_strtok_r(str + 8, " \t\n\r", &lasttok))) { cs=DpsGetCharSet(charset); if(!cs){ dps_snprintf(Env->errstr, sizeof(Env->errstr), "Unknown charset '%s' in synonyms file '%s'", charset, filename); DPS_FREE(data); DpsClose(fd); return DPS_ERROR; } DpsConvInit(&file_uni, cs, sys_int, Env->CharsToEscape, 0); } }else if(!strncasecmp(str,"Language:",9)){ char * lasttok; char * l; if((l = dps_strtok_r(str + 9, " \t\n\r", &lasttok))) { dps_strncpy(lang, l, sizeof(lang)-1); } }else if(!strncasecmp(str, "Thesaurus:", 10)) { char * lasttok; char *tok = dps_strtok_r(str + 10, " \t\n\r", &lasttok); flag_th = (strncasecmp(tok, "yes", 3) == 0) ? 1 : 0; }else{ char *av[255]; size_t ac, i, j; dpsunicode_t *t; if(!cs){ dps_snprintf(Env->errstr,sizeof(Env->errstr)-1,"No Charset command in synonyms file '%s'",filename); DpsClose(fd); DPS_FREE(data); return DPS_ERROR; } if(!lang[0]){ dps_snprintf(Env->errstr,sizeof(Env->errstr)-1,"No Language command in synonyms file '%s'",filename); DpsClose(fd); DPS_FREE(data); return DPS_ERROR; } ac = DpsGetArgs(str, av, 255); if (ac < 2) goto loop_continue; if ((ww = (DPS_WIDEWORD*)DpsRealloc(ww, ac * sizeof(DPS_WIDEWORD))) == NULL) return DPS_ERROR; for (i = 0; i < ac; i++) { ww[i].word = av[i]; ww[i].len = dps_strlen(av[i]); ww[i].uword = t = (dpsunicode_t*)DpsMalloc((3 * ww[i].len + 1) * sizeof(dpsunicode_t)); if (ww[i].uword == NULL) return DPS_ERROR; DpsConv(&file_uni, (char*)ww[i].uword, sizeof(dpsunicode_t) * (3 * ww[i].len + 1), av[i], ww[i].len + 1); DpsUniStrToLower(ww[i].uword); ww[i].uword = DpsUniNormalizeNFC(NULL, ww[i].uword); DPS_FREE(t); } for (i = 0; i < ac - 1; i++) { for (j = i + 1; j < ac; j++) { if((Env->Synonyms.nsynonyms + 1) >= Env->Synonyms.msynonyms){ Env->Synonyms.msynonyms += 64; Env->Synonyms.Synonym = (DPS_SYNONYM*)DpsRealloc(Env->Synonyms.Synonym, sizeof(DPS_SYNONYM)*Env->Synonyms.msynonyms); if (Env->Synonyms.Synonym == NULL) { Env->Synonyms.msynonyms = Env->Synonyms.nsynonyms = 0; return DPS_ERROR; } } bzero((void*)&Env->Synonyms.Synonym[Env->Synonyms.nsynonyms], sizeof(DPS_SYNONYM)); /* Add direct order */ Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.uword = DpsUniDup(ww[i].uword); Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.uword = DpsUniDup(ww[j].uword); Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.count = Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.count = (size_t)((flag_th) ? key : 0); Env->Synonyms.nsynonyms++; bzero((void*)&Env->Synonyms.Synonym[Env->Synonyms.nsynonyms], sizeof(DPS_SYNONYM)); /* Add reverse order */ Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.uword = DpsUniDup(ww[j].uword); Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.uword = DpsUniDup(ww[i].uword); Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.count = Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.count = (size_t)((flag_th) ? key : 0); Env->Synonyms.nsynonyms++; } } for (i = 0; i < ac; i++) { DPS_FREE(ww[i].uword); } do { key++; } while (key == 0); } loop_continue: str = cur_n; if (str != NULL) { *str = savebyte; cur_n = strchr(str, '\n'); if (cur_n != NULL) { cur_n++; savebyte = *cur_n; *cur_n = '\0'; } } } DPS_FREE(data); DPS_FREE(ww); DpsClose(fd); return DPS_OK; }