int DpsSpellAdd(DPS_SPELLLIST *List, const dpsunicode_t *word, const char *flag, const char *lang) { if(List->nspell>=List->mspell){ List->mspell += 1024; /* was: 1024 * 20 */ List->Spell=(DPS_SPELL *)DpsXrealloc(List->Spell,List->mspell*sizeof(DPS_SPELL)); if (List->Spell == NULL) return DPS_ERROR; } List->Spell[List->nspell].word = DpsUniRDup(word); dps_strncpy(List->Spell[List->nspell].flag,flag,10); dps_strncpy(List->Spell[List->nspell].lang,lang,5); List->Spell[List->nspell].lang[5] = List->Spell[List->nspell].flag[10] = '\0'; List->nspell++; return DPS_OK; }
static int open_socket(DPS_AGENT *A, char *unix_socket) { char unix_path[128]; struct sockaddr_un unix_addr; int sockfd, saddrlen; if (DpsRelVarName(A->Conf, unix_path, sizeof(unix_path), unix_socket) < 105) { } else { DpsLog(A, DPS_LOG_ERROR, "Unix socket name '%s' is too large", unix_path); return(DPS_NET_CANT_CONNECT); } if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) { DpsLog(A, DPS_LOG_ERROR, "unix socket() error %d", errno); return(DPS_NET_CANT_CONNECT); } DpsSockOpt(A, sockfd); bzero((void*)&unix_addr, sizeof(unix_addr)); unix_addr.sun_family = AF_UNIX; dps_strncpy(unix_addr.sun_path, unix_path, sizeof(unix_addr.sun_path)); saddrlen = sizeof(unix_addr.sun_family) + dps_strlen(unix_addr.sun_path); if(connect(sockfd, (struct sockaddr *)&unix_addr, sizeof (unix_addr))) { dps_strerror(A, DPS_LOG_ERROR, "unix socket '%s' connect() error", unix_path); return(DPS_NET_CANT_CONNECT); } return sockfd; }
int _DpsOpen3(const char *path, int flags, int mode, char *filename, size_t fileline) { register Slot * slot; register size_t count; Slot * fullSlot = NULL; int address; if ( allocationList == NULL ) initialize(); lock(); if ( unUsedSlots < 2 ) { allocateMoreSlots(); } for ( slot = allocationList, count = slotCount ; count > 0; count-- ) { if ( slot->mode == FREE || slot->mode == NOT_IN_USE) { fullSlot = slot; break; } slot++; } if ( !fullSlot ) FE_InternalError("No empty file slot."); fullSlot->fileline = fileline; dps_strncpy(fullSlot->filename, filename, DPS_FILENAMELEN); fullSlot->path = DpsStrdup(path); fullSlot->fd = address = open(path, flags, mode); if (address > 0) { fullSlot->mode = ALLOCATED; unUsedSlots--; } release(); return address; }
/* * This is the memory allocator. When asked to allocate a buffer, allocate * it in such a way that the end of the buffer is followed by an inaccessable * memory page. If software overruns that buffer, it will touch the bad page * and get an immediate segmentation fault. It's then easy to zero in on the * offending code with a debugger. * * There are a few complications. If the user asks for an odd-sized buffer, * we would have to have that buffer start on an odd address if the byte after * the end of the buffer was to be on the inaccessable page. Unfortunately, * there is lots of software that asks for odd-sized buffers and then * requires that the returned address be word-aligned, or the size of the * buffer be a multiple of the word size. An example are the string-processing * functions on Sun systems, which do word references to the string memory * and may refer to memory up to three bytes beyond the end of the string. * For this reason, I take the alignment requests to memalign() and valloc() * seriously, and * * Electric Fence wastes lots of memory. I do a best-fit allocator here * so that it won't waste even more. It's slow, but thrashing because your * working set is too big for a system's RAM is even slower. */ static void * _DpsMemalign(size_t alignment, size_t userSize, const char *filename, size_t fileline) { register Slot *slot, *slot2; register size_t count; Slot * fullSlot = 0; Slot * emptySlots[2]; size_t internalSize; size_t slack; char * address; if ( allocationList == 0 ) initialize(); lock(); if ( userSize == 0 && !EF_ALLOW_MALLOC_0 && strcmp(filename, "efence.c")) EF_Abort("Allocating 0 bytes, probably a bug at %s:%d.", filename, fileline); /* * If EF_PROTECT_BELOW is set, all addresses returned by malloc() * and company will be page-aligned. */ if ( !EF_PROTECT_BELOW && alignment > 1 ) { if ( (slack = userSize % alignment) != 0 ) userSize += alignment - slack; } /* * The internal size of the buffer is rounded up to the next page-size * boudary, and then we add another page's worth of memory for the * dead page. */ internalSize = userSize + bytesPerPage; if ( (slack = internalSize % bytesPerPage) != 0 ) internalSize += bytesPerPage - slack; /* * These will hold the addresses of two empty Slot structures, that * can be used to hold information for any memory I create, and any * memory that I mark free. */ emptySlots[0] = 0; emptySlots[1] = 0; /* * The internal memory used by the allocator is currently * inaccessable, so that errant programs won't scrawl on the * allocator's arena. I'll un-protect it here so that I can make * a new allocation. I'll re-protect it before I return. */ if ( !noAllocationListProtection ) Page_AllowAccess(allocationList, allocationListSize); /* * If I'm running out of empty slots, create some more before * I don't have enough slots left to make an allocation. */ if ( !internalUse && unUsedSlots < 7 ) { allocateMoreSlots(); } /* * Iterate through all of the slot structures. Attempt to find a slot * containing free memory of the exact right size. Accept a slot with * more memory than we want, if the exact right size is not available. * Find two slot structures that are not in use. We will need one if * we split a buffer into free and allocated parts, and the second if * we have to create new memory and mark it as free. * */ slot = allocationList; slot2 = &slot[slotCount - 1]; while (slot <= slot2) { if ( slot->mode == FREE && slot->internalSize >= internalSize ) { if ( !fullSlot ||slot->internalSize < fullSlot->internalSize) { fullSlot = slot; if ( slot->internalSize == internalSize && emptySlots[0] ) break; /* All done, */ } } else if ( slot->mode == NOT_IN_USE ) { if ( !emptySlots[0] ) emptySlots[0] = slot; else if ( !emptySlots[1] ) emptySlots[1] = slot; else if ( fullSlot && fullSlot->internalSize == internalSize ) break; /* All done. */ } if ( slot2->mode == FREE && slot2->internalSize >= internalSize ) { if ( !fullSlot ||slot2->internalSize < fullSlot->internalSize) { fullSlot = slot2; if ( slot2->internalSize == internalSize && emptySlots[0] ) break; /* All done, */ } } else if ( slot2->mode == NOT_IN_USE ) { if ( !emptySlots[0] ) emptySlots[0] = slot2; else if ( !emptySlots[1] ) emptySlots[1] = slot2; else if ( fullSlot && fullSlot->internalSize == internalSize ) break; /* All done. */ } slot++; slot2--; } /* for ( slot = allocationList, count = slotCount ; count > 0; count-- ) { if ( slot->mode == FREE && slot->internalSize >= internalSize ) { if ( !fullSlot ||slot->internalSize < fullSlot->internalSize){ fullSlot = slot; if ( slot->internalSize == internalSize && emptySlots[0] ) break; *//* All done, *//* } } else if ( slot->mode == NOT_IN_USE ) { if ( !emptySlots[0] ) emptySlots[0] = slot; else if ( !emptySlots[1] ) emptySlots[1] = slot; else if ( fullSlot && fullSlot->internalSize == internalSize ) break; *//* All done. *//* } slot++; } */ if ( !emptySlots[0] ) EF_InternalError("No empty slot 0."); if ( !fullSlot ) { /* * I get here if I haven't been able to find a free buffer * with all of the memory I need. I'll have to create more * memory. I'll mark it all as free, and then split it into * free and allocated portions later. */ size_t chunkSize = MEMORY_CREATION_SIZE; if ( !emptySlots[1] ) EF_InternalError("No empty slot 1."); if ( chunkSize < internalSize ) chunkSize = internalSize; if ( (slack = chunkSize % bytesPerPage) != 0 ) chunkSize += bytesPerPage - slack; /* Use up one of the empty slots to make the full slot. */ fullSlot = emptySlots[0]; emptySlots[0] = emptySlots[1]; fullSlot->internalAddress = Page_Create(chunkSize); fullSlot->internalSize = chunkSize; fullSlot->mode = FREE; unUsedSlots--; /* Fill the slot if it was specified to do so. */ if ( EF_FILL != -1 ) memset( (char *)fullSlot->internalAddress ,EF_FILL ,chunkSize); } /* * If I'm allocating memory for the allocator's own data structures, * mark it INTERNAL_USE so that no errant software will be able to * free it. */ if ( internalUse ) fullSlot->mode = INTERNAL_USE; else fullSlot->mode = ALLOCATED; /* * If the buffer I've found is larger than I need, split it into * an allocated buffer with the exact amount of memory I need, and * a free buffer containing the surplus memory. */ if ( fullSlot->internalSize > internalSize ) { emptySlots[0]->internalSize = fullSlot->internalSize - internalSize; emptySlots[0]->internalAddress = ((char *)fullSlot->internalAddress) + internalSize; emptySlots[0]->mode = FREE; fullSlot->internalSize = internalSize; unUsedSlots--; } if ( !EF_PROTECT_BELOW ) { /* * Arrange the buffer so that it is followed by an inaccessable * memory page. A buffer overrun that touches that page will * cause a segmentation fault. */ address = (char *)fullSlot->internalAddress; /* Set up the "live" page. */ if ( internalSize - bytesPerPage > 0 ) Page_AllowAccess( fullSlot->internalAddress ,internalSize - bytesPerPage); address += internalSize - bytesPerPage; /* Set up the "dead" page. */ if ( EF_PROTECT_FREE ) Page_Delete(address, bytesPerPage); else Page_DenyAccess(address, bytesPerPage); /* Figure out what address to give the user. */ address -= userSize; } else { /* EF_PROTECT_BELOW != 0 */ /* * Arrange the buffer so that it is preceded by an inaccessable * memory page. A buffer underrun that touches that page will * cause a segmentation fault. */ address = (char *)fullSlot->internalAddress; /* Set up the "dead" page. */ if ( EF_PROTECT_FREE ) Page_Delete(address, bytesPerPage); else Page_DenyAccess(address, bytesPerPage); address += bytesPerPage; /* Set up the "live" page. */ if ( internalSize - bytesPerPage > 0 ) Page_AllowAccess(address, internalSize - bytesPerPage); } fullSlot->userAddress = address; fullSlot->userSize = userSize; fullSlot->fileline = fileline; dps_strncpy(fullSlot->filename, filename, DPS_FILENAMELEN); /* if (slotCount > 1) DpsSort(allocationList, slotCount, sizeof(Slot), (qsort_cmp)cmp_Slot);*/ /* * Make the pool's internal memory inaccessable, so that the program * being debugged can't stomp on it. */ if ( !internalUse ) Page_DenyAccess(allocationList, allocationListSize); release(); /* if (address == 0x292d3000) { int r = 1 / 0; printf("Error r:%d\n"); }*/ /* fprintf(stderr, " -- allocated: %p @ %s:%d\n", address, filename, fileline); */ return address; }
int main(int argc, char ** argv, char **envp) { const char *env, *bcharset, *lcharset, *conf_dir; char template_name[PATH_MAX+6]=""; char *template_filename = NULL; char *query_string = NULL; char self[1024]=""; char *url = NULL; const char *ResultContentType; int res,httpd=0; size_t catcolumns = 0; int page_size,page_number; DPS_ENV *Env; DPS_AGENT *Agent; DPS_VARLIST query_vars; /* Output Content-type if under HTTPD */ /* Some servers do not pass QUERY_STRING */ /* if the query was empty, so check */ /* REQUEST_METHOD too to be safe */ httpd=(getenv("QUERY_STRING")||getenv("REQUEST_METHOD")); if (!(conf_dir=getenv("DPS_ETC_DIR"))) conf_dir=DPS_CONF_DIR; DpsInit(argc, argv, envp); Env=DpsEnvInit(NULL); if (Env == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't alloc Env\n"); exit(0); } DpsVarListInit(&query_vars); Agent = DpsAgentInit(NULL, Env, 0); if (Agent == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't alloc Agent\n"); exit(0); } DpsVarListAddEnviron(&Env->Vars,"ENV"); /* Detect self and template name */ if((env = getenv("DPSEARCH_TEMPLATE"))) dps_strncpy(template_name, env, sizeof(template_name) - 1); else if((env = getenv("PATH_INFO")) && env[0]) dps_strncpy(template_name, env + 1, sizeof(template_name) - 1); if((env=getenv("DPSEARCH_SELF"))) dps_strncpy(self,env,sizeof(self)-1); if((env=getenv("QUERY_STRING"))){ query_string = (char*)DpsRealloc(query_string, dps_strlen(env) + 2); if (query_string == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't alloc query_string\n"); exit(0); } dps_strncpy(query_string, env, dps_strlen(env) + 1); /* Hack for Russian Apache from apache.lexa.ru */ /* QUERY_STRING is already converted to server */ /* character set. We must print original query */ /* string instead however. Under usual apache */ /* we'll use QUERY_STRING. Note that query_vars */ /* list will contain not unescaped values, so */ /* we don't have to escape them when displaying */ env = getenv("CHARSET_SAVED_QUERY_STRING"); DpsParseQStringUnescaped(&query_vars,env?env:query_string); /* Unescape and save variables from QUERY_STRING */ /* Env->Vars will have unescaped values however */ DpsParseQueryString(Agent,&Env->Vars,query_string); template_filename = (char*)DpsStrdup(DpsVarListFindStr(&Env->Vars, "tmplt", "")); if((env=getenv("REDIRECT_STATUS"))){ /* Check Apache internal redirect */ /* via "AddHandler" and "Action" */ if(!self[0]){ dps_strncpy(self,(env=getenv("REDIRECT_URL"))?env:"filler.cgi",sizeof(self)-1); } if(!template_name[0]){ dps_strncpy(template_name,(env=getenv("PATH_TRANSLATED"))?env:"",sizeof(template_name)-1); } if (*template_filename == '\0') { DPS_FREE(template_filename); template_filename = (char*)DpsStrdup("filler.htm"); } }else{ /* CGI executed without Apache internal redirect */ /* Detect $Self variable with OS independant SLASHES */ if(!self[0]){ dps_strncpy(self,(env=getenv("SCRIPT_NAME"))?env:"filler.cgi",sizeof(self)-1); } if(!template_name[0]){ char *s,*e; /*This is with OS specific SLASHES */ env=((env=getenv("SCRIPT_FILENAME"))?env:"filler.cgi"); if(strcmp(conf_dir,".")){ /* Take from the config directory */ dps_snprintf(template_name, sizeof(template_name)-1, "%s/%s", conf_dir,(s=strrchr(env,DPSSLASH))?(s+1):(self)); }else{ /* Take from the current directory */ dps_strncpy(template_name,env,sizeof(template_name)-1); } /* Find right slash if it presents */ s=((s=strrchr(template_name,DPSSLASH))?s:template_name); if (*template_filename == '\0') { /* Find .cgi substring */ if ((e = strstr(s, ".cgi")) != NULL) { /* Replace ".cgi" with ".htm" */ e[1]='h';e[2]='t';e[3]='m'; } else { dps_strcat(s, ".htm"); } e = strrchr(s, '/'); DPS_FREE(template_filename); template_filename = (char*)DpsStrdup(e + 1); } else { dps_strncpy(s + 1, template_filename, sizeof(template_name) - (s - template_name) - 2); } } } }else{ /* Executed from command line */ /* or under server which does not */ /* pass an empty QUERY_STRING var */ if(argv[1]) { query_string = (char*)DpsRealloc(query_string, dps_strlen(argv[1]) + 10); if (query_string == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't realloc query_string\n"); exit(0); } sprintf(query_string, "q=%s", argv[1]); } else { query_string = (char*)DpsRealloc(query_string, 1024); if (query_string == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't realloc query_string\n"); exit(0); } sprintf(query_string, "q="); } /* Hack for Russian Apache from apache.lexa.ru */ /* QUERY_STRING is already converted to server */ /* character set. We must print original query */ /* string instead however. Under usual apache */ /* we'll use QUERY_STRING. Note that query_vars */ /* list will contain not unescaped values, so */ /* we don't have to escape them when displaying */ env = getenv("CHARSET_SAVED_QUERY_STRING"); DpsParseQStringUnescaped(&query_vars,env?env:query_string); /* Unescape and save variables from QUERY_STRING */ /* Env->Vars will have unescaped values however */ DpsParseQueryString(Agent,&Env->Vars,query_string); DPS_FREE(template_filename); template_filename = (char*)DpsStrdup(DpsVarListFindStr(&Env->Vars, "tmplt", "")); if (*template_filename == '\0') { DPS_FREE(template_filename); template_filename = (char*)DpsStrdup("filler.htm"); } /*// Get template name from command line variable &tmplt */ if(!template_name[0]) dps_snprintf(template_name,sizeof(template_name),"%s/%s", conf_dir, template_filename); } DpsVarListReplaceStr(&Agent->Conf->Vars, "tmplt", template_filename); DPS_FREE(template_filename); Agent->tmpl.Env_Vars = &Env->Vars; DpsURLNormalizePath(template_name); if (strncmp(template_name, conf_dir, dps_strlen(conf_dir)) || (res = DpsTemplateLoad(Agent, Env, &Agent->tmpl, template_name))) { if (strcmp(template_name, "filler.htm")) { /* trying load default template */ fprintf(stderr, "Can't load template: '%s' %s\n", template_name, Env->errstr); DPS_FREE(template_filename); template_filename = (char*)DpsStrdup("filler.htm"); dps_snprintf(template_name, sizeof(template_name), "%s/%s", conf_dir, template_filename); if ((res = DpsTemplateLoad(Agent, Env, &Agent->tmpl, template_name))) { if(httpd)printf("Content-Type: text/plain\r\n\r\n"); printf("%s\n",Env->errstr); DpsVarListFree(&query_vars); DpsEnvFree(Env); DPS_FREE(query_string); DpsAgentFree(Agent); return(0); } } else { if(httpd)printf("Content-Type: text/plain\r\n\r\n"); printf("%s\n",Env->errstr); DpsVarListFree(&query_vars); DpsEnvFree(Env); DPS_FREE(query_string); DpsAgentFree(Agent); return(0); } } /* set locale if specified */ if ((url = DpsVarListFindStr(&Env->Vars, "Locale", NULL)) != NULL) { setlocale(LC_ALL, url); /*#ifdef HAVE_ASPELL*/ { char *p; if ((p = strchr(url, '.')) != NULL) { *p = '\0'; DpsVarListReplaceStr(&Env->Vars, "g-lc", url); *p = '.'; } } /*#endif*/ url = NULL; } /* Call again to load search Limits if need */ DpsParseQueryString(Agent, &Env->Vars, query_string); Agent->Flags = Env->Flags; Agent->flags |= DPS_FLAG_UNOCON; Env->flags |= DPS_FLAG_UNOCON; DpsSetLogLevel(NULL, DpsVarListFindInt(&Env->Vars, "LogLevel", 0)); DpsOpenLog("filler.cgi", Env, !strcasecmp(DpsVarListFindStr(&Env->Vars, "Log2stderr", (!httpd) ? "yes" : "no"), "yes")); DpsLog(Agent,DPS_LOG_ERROR,"filler.cgi started with '%s'",template_name); DpsLog(Agent, DPS_LOG_DEBUG, "VarDir: '%s'", DpsVarListFindStr(&Agent->Conf->Vars, "VarDir", DPS_VAR_DIR)); DpsLog(Agent, DPS_LOG_DEBUG, "Affixes: %d, Spells: %d, Synonyms: %d, Acronyms: %d, Stopwords: %d", Env->Affixes.naffixes,Env->Spells.nspell, Env->Synonyms.nsynonyms, Env->Acronyms.nacronyms, Env->StopWords.nstopwords); DpsLog(Agent, DPS_LOG_DEBUG, "Chinese dictionary with %d entries", Env->Chi.nwords); DpsLog(Agent, DPS_LOG_DEBUG, "Korean dictionary with %d entries", Env->Korean.nwords); DpsLog(Agent, DPS_LOG_DEBUG, "Thai dictionary with %d entries", Env->Thai.nwords); DpsVarListAddLst(&Agent->Vars, &Env->Vars, NULL, "*"); Agent->tmpl.Env_Vars = &Agent->Vars; /* DpsVarListAddEnviron(&Agent->Vars, "ENV");*/ /****************************************************************************************************************************************/ /* This is for query tracking */ DpsVarListAddStr(&Agent->Vars, "QUERY_STRING", query_string); DpsVarListAddStr(&Agent->Vars, "self", self); env = getenv("HTTP_X_FORWARDER_FOR"); if (env) { DpsVarListAddStr(&Agent->Vars, "IP", env); } else { env = getenv("REMOTE_ADDR"); DpsVarListAddStr(&Agent->Vars, "IP", env ? env : "localhost"); } bcharset = DpsVarListFindStr(&Agent->Vars, "BrowserCharset", "iso-8859-1"); Env->bcs=DpsGetCharSet(bcharset); lcharset = DpsVarListFindStr(&Agent->Vars, "LocalCharset", "iso-8859-1"); Env->lcs=DpsGetCharSet(lcharset); ResultContentType = DpsVarListFindStr(&Agent->Vars, "ResultContentType", "text/html"); if(httpd){ if(!Env->bcs){ printf("Content-Type: text/plain\r\n\r\n"); printf("Unknown BrowserCharset '%s' in template '%s'\n",bcharset,template_name); exit(0); }else if(!Env->lcs){ printf("Content-Type: text/plain\r\n\r\n"); printf("Unknown LocalCharset '%s' in template '%s'\n",lcharset,template_name); exit(0); }else{ printf("Content-type: %s; charset=%s\r\n\r\n", ResultContentType, bcharset); } }else{ if(!Env->bcs){ printf("Unknown BrowserCharset '%s' in template '%s'\n",bcharset,template_name); exit(0); } if(!Env->lcs){ printf("Unknown LocalCharset '%s' in template '%s'\n",lcharset,template_name); exit(0); } } /* These parameters taken from "variable section of template"*/ res = DpsVarListFindInt(&Agent->Vars, "ps", DPS_DEFAULT_PS); page_size = dps_min(res, MAX_PS); page_number = DpsVarListFindInt(&Agent->Vars, "p", 0); if (page_number == 0) { page_number = DpsVarListFindInt(&Agent->Vars, "np", 0); DpsVarListReplaceInt(&Agent->Vars, "p", page_number + 1); } else page_number--; res = DpsVarListFindInt(&Agent->Vars, "np", 0) * page_size; DpsVarListAddInt(&Agent->Vars, "pn", res); catcolumns = (size_t)atoi(DpsVarListFindStr(&Agent->Vars, "CatColumns", "")); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "top"); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "restop"); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "res"); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "resbot"); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "bottom"); DpsVarListFree(&query_vars); DpsAgentFree(Agent); DpsEnvFree(Env); DPS_FREE(query_string); DPS_FREE(url); if (httpd) fflush(NULL); else fclose(stdout); #ifdef EFENCE fprintf(stderr, "Memory leaks checking\n"); DpsEfenceCheckLeaks(); #endif #ifdef FILENCE fprintf(stderr, "FD leaks checking\n"); DpsFilenceCheckLeaks(NULL); #endif return DPS_OK; }
__C_LINK int __DPSCALL DpsSynonymListLoad(DPS_ENV * Env,const char * filename){ struct stat sb; char *str, *data = NULL, *cur_n = NULL; char lang[64]=""; DPS_CHARSET *cs=NULL; DPS_CHARSET *sys_int=DpsGetCharSet("sys-int"); DPS_CONV file_uni; DPS_WIDEWORD *ww = NULL; size_t key = 1; int flag_th = 0; int fd; char savebyte; if (stat(filename, &sb)) { fprintf(stderr, "Unable to stat synonyms file '%s': %s", filename, strerror(errno)); return DPS_ERROR; } if ((fd = DpsOpen2(filename, O_RDONLY)) <= 0) { dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to open synonyms file '%s': %s", filename, strerror(errno)); return DPS_ERROR; } if ((data = (char*)DpsMalloc(sb.st_size + 1)) == NULL) { dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to alloc %d bytes", sb.st_size); DpsClose(fd); return DPS_ERROR; } if (read(fd, data, sb.st_size) != (ssize_t)sb.st_size) { dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to read synonym file '%s': %s", filename, strerror(errno)); DPS_FREE(data); DpsClose(fd); return DPS_ERROR; } data[sb.st_size] = '\0'; str = data; cur_n = strchr(str, '\n'); if (cur_n != NULL) { cur_n++; savebyte = *cur_n; *cur_n = '\0'; } while(str != NULL) { if(str[0]=='#'||str[0]==' '||str[0]=='\t'||str[0]=='\r'||str[0]=='\n') goto loop_continue; if(!strncasecmp(str,"Charset:",8)){ char * lasttok; char * charset; if((charset = dps_strtok_r(str + 8, " \t\n\r", &lasttok))) { cs=DpsGetCharSet(charset); if(!cs){ dps_snprintf(Env->errstr, sizeof(Env->errstr), "Unknown charset '%s' in synonyms file '%s'", charset, filename); DPS_FREE(data); DpsClose(fd); return DPS_ERROR; } DpsConvInit(&file_uni, cs, sys_int, Env->CharsToEscape, 0); } }else if(!strncasecmp(str,"Language:",9)){ char * lasttok; char * l; if((l = dps_strtok_r(str + 9, " \t\n\r", &lasttok))) { dps_strncpy(lang, l, sizeof(lang)-1); } }else if(!strncasecmp(str, "Thesaurus:", 10)) { char * lasttok; char *tok = dps_strtok_r(str + 10, " \t\n\r", &lasttok); flag_th = (strncasecmp(tok, "yes", 3) == 0) ? 1 : 0; }else{ char *av[255]; size_t ac, i, j; dpsunicode_t *t; if(!cs){ dps_snprintf(Env->errstr,sizeof(Env->errstr)-1,"No Charset command in synonyms file '%s'",filename); DpsClose(fd); DPS_FREE(data); return DPS_ERROR; } if(!lang[0]){ dps_snprintf(Env->errstr,sizeof(Env->errstr)-1,"No Language command in synonyms file '%s'",filename); DpsClose(fd); DPS_FREE(data); return DPS_ERROR; } ac = DpsGetArgs(str, av, 255); if (ac < 2) goto loop_continue; if ((ww = (DPS_WIDEWORD*)DpsRealloc(ww, ac * sizeof(DPS_WIDEWORD))) == NULL) return DPS_ERROR; for (i = 0; i < ac; i++) { ww[i].word = av[i]; ww[i].len = dps_strlen(av[i]); ww[i].uword = t = (dpsunicode_t*)DpsMalloc((3 * ww[i].len + 1) * sizeof(dpsunicode_t)); if (ww[i].uword == NULL) return DPS_ERROR; DpsConv(&file_uni, (char*)ww[i].uword, sizeof(dpsunicode_t) * (3 * ww[i].len + 1), av[i], ww[i].len + 1); DpsUniStrToLower(ww[i].uword); ww[i].uword = DpsUniNormalizeNFC(NULL, ww[i].uword); DPS_FREE(t); } for (i = 0; i < ac - 1; i++) { for (j = i + 1; j < ac; j++) { if((Env->Synonyms.nsynonyms + 1) >= Env->Synonyms.msynonyms){ Env->Synonyms.msynonyms += 64; Env->Synonyms.Synonym = (DPS_SYNONYM*)DpsRealloc(Env->Synonyms.Synonym, sizeof(DPS_SYNONYM)*Env->Synonyms.msynonyms); if (Env->Synonyms.Synonym == NULL) { Env->Synonyms.msynonyms = Env->Synonyms.nsynonyms = 0; return DPS_ERROR; } } bzero((void*)&Env->Synonyms.Synonym[Env->Synonyms.nsynonyms], sizeof(DPS_SYNONYM)); /* Add direct order */ Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.uword = DpsUniDup(ww[i].uword); Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.uword = DpsUniDup(ww[j].uword); Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.count = Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.count = (size_t)((flag_th) ? key : 0); Env->Synonyms.nsynonyms++; bzero((void*)&Env->Synonyms.Synonym[Env->Synonyms.nsynonyms], sizeof(DPS_SYNONYM)); /* Add reverse order */ Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.uword = DpsUniDup(ww[j].uword); Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.uword = DpsUniDup(ww[i].uword); Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.count = Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.count = (size_t)((flag_th) ? key : 0); Env->Synonyms.nsynonyms++; } } for (i = 0; i < ac; i++) { DPS_FREE(ww[i].uword); } do { key++; } while (key == 0); } loop_continue: str = cur_n; if (str != NULL) { *str = savebyte; cur_n = strchr(str, '\n'); if (cur_n != NULL) { cur_n++; savebyte = *cur_n; *cur_n = '\0'; } } } DPS_FREE(data); DPS_FREE(ww); DpsClose(fd); return DPS_OK; }