static void DpsChineseListAddBundle(DPS_CHINALIST *List, DPS_CHINAWORD * chinaword){ unsigned int h; size_t len; if (List->nwords + 1 > List->mwords) { List->mwords += 1024; List->ChiWord = (DPS_CHINAWORD *)DpsRealloc(List->ChiWord, (List->mwords)*sizeof(DPS_CHINAWORD)); if (List->ChiWord == NULL) { List->mwords = List->nwords = 0; return; } } if (List->hash == NULL) { List->hash = (size_t *)DpsXmalloc(65536 * sizeof(size_t)); if (List->hash == NULL) { List->mwords = List->nwords = 0; return; } } List->ChiWord[List->nwords].word = chinaword->word; List->ChiWord[List->nwords].freq = chinaword->freq; List->total += chinaword->freq; h = (unsigned int)(List->ChiWord[List->nwords].word[0] & 0xffff); if (List->hash[h] < (len = DpsUniLen(List->ChiWord[List->nwords].word))) { List->hash[h] = len; } List->nwords++; }
static int PUSHARG(DPS_BOOLSTACK * s, DPS_STACK_ITEM *arg) { s->astack[s->nastack] = *arg; s->nastack++; if (s->nastack >= s->mastack) { s->mastack += DPS_MAXSTACK; s->astack = (DPS_STACK_ITEM*)DpsRealloc(s->astack, s->mastack * sizeof(DPS_STACK_ITEM)); if (s->astack == NULL) return DPS_ERROR; } return DPS_OK; }
static int PUSHCMD(DPS_BOOLSTACK * s, int arg) { s->cstack[s->ncstack] = arg; s->ncstack++; if (s->ncstack >= s->mcstack) { s->mcstack += DPS_MAXSTACK; s->cstack = (int*)DpsRealloc(s->cstack, s->mcstack * sizeof(int)); if (s->cstack == NULL) return DPS_ERROR; } return DPS_OK; }
__C_LINK void __DPSCALL DpsSynonymListSort(DPS_SYNONYMLIST * List){ if (List->Synonym != NULL) { if (List->nsynonyms > 1) DpsSort(List->Synonym, List->nsynonyms, sizeof(DPS_SYNONYM), &cmpsyn); if ((List->Back = (DPS_SYNONYM**)DpsRealloc(List->Back, (List->nsynonyms + 1) * sizeof(DPS_SYNONYM*))) != NULL) { register size_t i; for (i = 0; i < List->nsynonyms; i++) List->Back[i] = &List->Synonym[i]; if (List->nsynonyms > 1) DpsSort(List->Back, List->nsynonyms, sizeof(DPS_SYNONYM*), &cmpsynback); } } }
__C_LINK void __DPSCALL DpsTextListAdd(DPS_TEXTLIST * tlist,const DPS_TEXTITEM *item){ #ifdef WITH_PARANOIA void * paran = DpsViolationEnter(paran); #endif if(!item->str) { #ifdef WITH_PARANOIA DpsViolationExit(-1, paran); #endif return; } #ifdef DEBUG_MEM if (tlist->mitems) { mprotect(tlist->Items, tlist->nitems * sizeof(DPS_TEXTITEM), PROT_READ | PROT_WRITE); /* fprintf(stderr, "addr: %x, len: %d till %x -- PROT_BOTH\n", tlist->Items, tlist->nitems * sizeof(DPS_TEXTITEM), tlist->Items + tlist->nitems );*/ } #endif if (tlist->nitems + 1 > tlist->mitems) { tlist->mitems += DPS_TEXTLIST_PAS; tlist->Items = (DPS_TEXTITEM*)DpsRealloc(tlist->Items, (tlist->mitems) * sizeof(DPS_TEXTITEM) + 4096); if (tlist->Items == NULL) { tlist->nitems = tlist->mitems = 0; return; } } tlist->Items[tlist->nitems].str = (char*)DpsStrdup(item->str); tlist->Items[tlist->nitems].href = (item->href != NULL) ? (char*)DpsStrdup(item->href) : NULL; tlist->Items[tlist->nitems].section_name = (item->section_name != NULL) ? (char*)DpsStrdup(item->section_name) : NULL; tlist->Items[tlist->nitems].section = item->section; tlist->Items[tlist->nitems].strict = item->strict; tlist->Items[tlist->nitems].len = item->len; tlist->Items[tlist->nitems].marked = 0; tlist->nitems++; #ifdef DEBUG_MEM if (tlist->mitems) { mprotect(tlist->Items, tlist->nitems * sizeof(DPS_TEXTITEM), PROT_READ); /* fprintf(stderr, "addr: %x, len: %d till %x -- PROT_READ\n", tlist->Items, tlist->nitems * sizeof(DPS_TEXTITEM), tlist->Items + tlist->nitems );*/ } #endif #ifdef WITH_PARANOIA DpsViolationExit(-1, paran); #endif return; }
void DpsAppendTarget(DPS_AGENT *Indexer, const char *url, const char *lang, const int hops, int parent) { DPS_DOCUMENT *Doc, *Save; size_t i; TRACE_IN(Indexer, "AppendTarget"); DPS_GETLOCK(Indexer, DPS_LOCK_THREAD); DPS_GETLOCK(Indexer, DPS_LOCK_CONF); if (Indexer->Conf->Targets.num_rows > 0) { for (i = Indexer->Conf->Targets.num_rows - 1; i > 0; i--) { Doc = &Indexer->Conf->Targets.Doc[i]; if ((strcasecmp(DpsVarListFindStr(&Doc->Sections, "URL", ""), url) == 0) && (strcmp(DpsVarListFindStr(&Doc->RequestHeaders, "Accept-Language", ""), lang) == 0)) { DPS_RELEASELOCK(Indexer, DPS_LOCK_CONF); DPS_RELEASELOCK(Indexer, DPS_LOCK_THREAD); TRACE_OUT(Indexer); return; } } } if ((Indexer->Conf->Targets.Doc = DpsRealloc(Save = Indexer->Conf->Targets.Doc, (Indexer->Conf->Targets.num_rows + 1) * sizeof(DPS_DOCUMENT))) == NULL) { Indexer->Conf->Targets.Doc = Save; DPS_RELEASELOCK(Indexer, DPS_LOCK_CONF); DPS_RELEASELOCK(Indexer, DPS_LOCK_THREAD); TRACE_OUT(Indexer); return; } Doc = &Indexer->Conf->Targets.Doc[Indexer->Conf->Targets.num_rows]; DpsDocInit(Doc); DpsVarListAddStr(&Doc->Sections, "URL", url); DpsVarListAddInt(&Doc->Sections, "Hops", hops); DpsVarListDel(&Doc->Sections, "URL_ID"); DpsVarListReplaceInt(&Doc->Sections, "Referrer-ID", parent); if (*lang != '\0') DpsVarListAddStr(&Doc->RequestHeaders, "Accept-Language", lang); if (DPS_OK == DpsURLAction(Indexer, Doc, DPS_URL_ACTION_FINDBYURL)) { urlid_t url_id = DpsVarListFindInt(&Doc->Sections, "DP_ID", 0); if (url_id != 0) Indexer->Conf->Targets.num_rows++; else DpsDocFree(Doc); } /* fprintf(stderr, "-- AppandTarget: url:%s URL_ID:%d\n", url, DpsStrHash32(url));*/ DPS_RELEASELOCK(Indexer, DPS_LOCK_CONF); DpsURLAction(Indexer, Doc, DPS_URL_ACTION_ADD); DPS_RELEASELOCK(Indexer, DPS_LOCK_THREAD); TRACE_OUT(Indexer); return; }
static int DpsUniRegComp(DPS_UNIREG_EXP *reg, const dpsunicode_t *pattern) { const dpsunicode_t *tok, *lt; reg->ntokens=0; reg->Token=NULL; tok=DpsUniRegTok(pattern,<); while(tok){ size_t len; reg->Token=(DPS_UNIREG_TOK*)DpsRealloc(reg->Token,sizeof(*reg->Token)*(reg->ntokens+1)); if (reg->Token == NULL) { reg->ntokens = 0; return DPS_ERROR; } len=lt-tok; reg->Token[reg->ntokens].str = (dpsunicode_t*)DpsMalloc((len+1)*sizeof(dpsunicode_t)); dps_memmove(reg->Token[reg->ntokens].str, tok, len * sizeof(dpsunicode_t)); reg->Token[reg->ntokens].str[len]=0; reg->ntokens++; tok=DpsUniRegTok(NULL,<); } return DPS_OK; }
dpsunicode_t *DpsSegmentByFreq(DPS_CHINALIST *List, dpsunicode_t *line) { dpsunicode_t *out, *mid, *last, *sentence, *segmented_sentence, part; size_t i, j, l, a; int /*reg = 1,*/ ctype, have_bukva_forte, fb_type; dpsunicode_t space[] = { 32, 0 }; l = 2 * (DpsUniLen(line) + 1); if (l < 2) return NULL; out = (dpsunicode_t*)DpsMalloc(l * sizeof(dpsunicode_t)); if (out == NULL) return NULL; *out = '\0'; mid = (dpsunicode_t*)DpsXmalloc(l * sizeof(dpsunicode_t)); if (mid == NULL) { DPS_FREE(out); return NULL; } *mid = '\0'; for (i = j = 0; i < DpsUniLen(line); i++) { /* if (line[i] >= 0x80) { if (reg == 0) { mid[j++] = *space; reg = 1; } } else { if (reg == 1) { mid[j++] = *space; reg = 0; } }*/ mid[j++] = line[i]; } /* mid[j] = 0;*/ for (sentence = DpsUniGetSepToken(/*line*/ mid, &last, &ctype, &have_bukva_forte, 0); sentence; sentence = DpsUniGetSepToken(NULL, &last, &ctype, &have_bukva_forte, 0)) { part = *last; *last = 0; fb_type = DpsUniCType(*sentence); if (fb_type > DPS_UNI_BUKVA || fb_type == 2 || fb_type == 1) { a = 2 * (DpsUniLen(sentence) + 1); j = DpsUniLen(out); if (j + a >= l) { l = j + a + 1; out = (dpsunicode_t*)DpsRealloc(out, l * sizeof(dpsunicode_t)); if (out == NULL) { DPS_FREE(mid); return NULL; } } if (*out) DpsUniStrCat(out, space); DpsUniStrCat(out, sentence); } else { if ((segmented_sentence = DpsSegmentProcess(List, sentence)) != NULL) { a = 2 * (DpsUniLen(segmented_sentence) + 1); j = DpsUniLen(out); if (j + a >= l) { l = j + a + 1; out = (dpsunicode_t*)DpsRealloc(out, l * sizeof(dpsunicode_t)); if (out == NULL) { DPS_FREE(mid); return NULL; } } if (*out) DpsUniStrCat(out, space); DpsUniStrCat(out, segmented_sentence); DPS_FREE(segmented_sentence); } else { DPS_FREE(mid); return NULL; } } *last = part; } DPS_FREE(mid); return out; }
static dpsunicode_t *DpsSegmentProcess(DPS_CHINALIST *List, dpsunicode_t *line) { int top, nextid, *position, *next, len, maxid, i, current, father, needinsert, iindex; unsigned int h; double *value, p; dpsunicode_t **result; dpsunicode_t *otv, space[] = {32, 0}; DPS_CHINAWORD *chinaword, chiw; if (/*(line[0] >= 0x80) &&*/ (List->hash != NULL)) { len = DpsUniLen(line); maxid = 2 * len + 1; position = (int*)DpsMalloc(maxid * sizeof(int)); if (position == NULL) return NULL; next = (int*)DpsMalloc(maxid * sizeof(int)); if (next == NULL) { DPS_FREE(position); return NULL; } value = (double*)DpsMalloc(maxid * sizeof(double)); if (value == NULL) { DPS_FREE(position); DPS_FREE(next); return NULL; } result = (dpsunicode_t **)DpsMalloc(maxid * sizeof(dpsunicode_t *)); if (result == NULL) { DPS_FREE(position); DPS_FREE(next); DPS_FREE(value); return NULL; } top = 0; /* value[0] = 1;*/ value[0] = 1.0 * List->total * len; position[0] = 0; next[0] = -1; result[0] = (dpsunicode_t*)DpsUniDup(&space[1]); nextid = 1; /* fprintf(stderr, "SegmentProcess start: len -- %d\n", len);*/ while ((top != -1) && (!((position[top] >= len) && (next[top] == -1)))) { /* fprintf(stderr, "top: %d position: %d (len: %d) next:%d\n", top, position[top], len, next[top]);*/ /* # find the first open path */ current = top; father = top; while ((current != -1) && (position[current] >= len)) { father = current; current = next[current]; } /* # remove this path */ if (current == top) { top = next[top]; } else { next[father] = next[current]; } if (current == -1) { /* # no open path, finished, take the first path */ next[top] = -1; } else { otv = &line[position[current]]; h = (unsigned int)(otv[0] & 0xffff); /* # if the first character doesn't have word phrase in the dict.*/ if (List->hash[h] == 0) { List->hash[h] = 1 /*2*/; } i = List->hash[h]; if (i + position[current] > len) { i = len - position[current]; } /*i = i + 1*/ /*2*/; otv = NULL; for (; i > 0; i-- /*2*/) { /*i = i - 1*/ /*2*/; DPS_FREE(otv); otv = DpsUniNDup(&line[position[current]], (size_t)i); chinaword = DpsChineseListFind(List, otv); if (i == 1 /*2*/ && chinaword == NULL) { DPS_FREE(otv); otv = DpsUniNDup(&line[position[current]], 1/*2*/); chiw.word = otv; chiw.freq = 1; DpsChineseListAdd(List, chinaword = &chiw); /* DpsChineseListSort(List);*/ /*i = 1*//*2*//*;*/ } if ((chinaword != NULL) && chinaword->freq) { /* # pronode() */ /* value[nextid] = value[current] * chinaword->freq / List->total;*/ p = (double)chinaword->freq / List->total; value[nextid] = value[current] / (-1.0 * log(p) / log(10.0)); position[nextid] = position[current] + i; h = DpsUniLen(result[current]) + DpsUniLen(otv) + 2; result[nextid] = (dpsunicode_t*)DpsXmalloc((size_t)h * sizeof(dpsunicode_t)); if (result[nextid] == NULL) { DPS_FREE(position); DPS_FREE(next); DPS_FREE(value); DPS_FREE(result); return NULL; } DpsUniStrCpy(result[nextid], result[current]); DpsUniStrCat(result[nextid], space); DpsUniStrCat(result[nextid], otv); /* # check to see whether there is duplicated path # if there is a duplicate path, remove the small value path */ needinsert = 1; iindex = top; father = top; while (iindex != -1) { if (position[iindex] == position[nextid]) { if (0.85 * value[iindex] >= value[nextid]) { needinsert = 0; } else { if (top == iindex) { next[nextid] = next[iindex]; top = nextid; needinsert = 0; /* } else { next[nextid] = next[father];*/ /* next[father] = next[nextid];*/ } } iindex = -1; } else { father = iindex; iindex = next[iindex]; } } /* # insert the new path into the list */ /* fprintf(stderr, "current:%d position:%d i:%d value[current]:%.12lf nextid:%d value[nextid]:%.12lf\n", current, position[current], i, value[current], nextid, value[nextid]);*/ if (needinsert == 1) { while ((iindex != -1) && (value[iindex] > value[nextid])) { father = iindex; iindex = next[iindex]; } if (top == iindex) { next[nextid] = top; top = nextid; } else { next[father] = nextid; next[nextid] = iindex; } } nextid++; if (nextid >= maxid) { maxid +=128; position = (int*)DpsRealloc(position, maxid * sizeof(int)); next = (int*)DpsRealloc(next, maxid * sizeof(int)); value = (double*)DpsRealloc(value, maxid * sizeof(double)); result = (dpsunicode_t **)DpsRealloc(result, maxid * sizeof(dpsunicode_t *)); if (position == NULL || next == NULL || value == NULL || result == NULL) { DPS_FREE(position); DPS_FREE(next); DPS_FREE(value); if (result != NULL) { for (i = 0; i < nextid; i++) { if (i != top) DPS_FREE(result[i]); } DPS_FREE(result); } return NULL; } } } } /*while ((i >= 1) && ( chinaword == NULL));*/ DPS_FREE(otv); } } DPS_FREE(position); DPS_FREE(next); for (i = 0; i < nextid; i++) { if (i != top) DPS_FREE(result[i]); } otv = result[top]; DPS_FREE(value); DPS_FREE(result); return otv; } else { return (dpsunicode_t*)DpsUniDup(line); } }
extern __C_LINK int __DPSCALL DpsBaseRelocate(DPS_AGENT *Agent, int base_type) { DPS_BASE_PARAM O, N; DPS_BASE_PARAM *Old = &O, *New = &N; size_t base, i, ndel, mdel = 128, data_len; urlid_t *todel = (int*)DpsMalloc(128 * sizeof(urlid_t)); void *data; bzero(Old, sizeof(O)); bzero(New, sizeof(N)); switch(base_type) { case 0: /* stored */ Old->subdir = "store"; Old->basename = "doc"; Old->indname = "doc"; Old->mode = DPS_WRITE_LOCK; Old->NFiles = (size_t)DpsVarListFindInt(&Agent->Vars, "OldStoredFiles", 0x100); Old->vardir = DpsVarListFindStr(&Agent->Vars, "VarDir", DPS_VAR_DIR); Old->A = Agent; New->subdir = "store"; New->basename = "doc"; New->indname = "doc"; New->mode = DPS_WRITE_LOCK; New->NFiles = (size_t)DpsVarListFindInt(&Agent->Vars, "StoredFiles", 0x100); New->vardir = DpsVarListFindStr(&Agent->Vars, "VarDir", DPS_VAR_DIR); New->A = Agent; DpsLog(Agent, DPS_LOG_INFO, "Relocating stored database"); break; case 1: /* URL data */ Old->subdir = DPS_URLDIR; Old->basename = "info"; Old->indname = "info"; Old->mode = DPS_WRITE_LOCK; Old->NFiles = (size_t)DpsVarListFindInt(&Agent->Vars, "OldURLDataFiles", 0x300); Old->vardir = DpsVarListFindStr(&Agent->Vars, "VarDir", DPS_VAR_DIR); Old->A = Agent; #ifdef HAVE_ZLIB O.zlib_method = Z_DEFLATED; O.zlib_level = 9; O.zlib_windowBits = DPS_BASE_INFO_WINDOWBITS; O.zlib_memLevel = 9; O.zlib_strategy = DPS_BASE_INFO_STRATEGY; #endif New->subdir = DPS_URLDIR; New->basename = "info"; New->indname = "info"; New->mode = DPS_WRITE_LOCK; New->NFiles = (size_t)DpsVarListFindInt(&Agent->Vars, "URLDataFiles", 0x300); New->vardir = DpsVarListFindStr(&Agent->Vars, "VarDir", DPS_VAR_DIR); New->A = Agent; #ifdef HAVE_ZLIB N.zlib_method = Z_DEFLATED; N.zlib_level = 9; N.zlib_windowBits = DPS_BASE_INFO_WINDOWBITS; N.zlib_memLevel = 9; N.zlib_strategy = DPS_BASE_INFO_STRATEGY; #endif DpsLog(Agent, DPS_LOG_INFO, "Relocating URLData database"); break; case 2: /* tree wrd */ Old->subdir = DPS_TREEDIR; Old->basename = "wrd"; Old->indname = "wrd"; Old->mode = DPS_WRITE_LOCK; Old->NFiles = (size_t)DpsVarListFindInt(&Agent->Vars, "OldWrdFiles", 0x300); Old->vardir = DpsVarListFindStr(&Agent->Vars, "VarDir", DPS_VAR_DIR); Old->A = Agent; #ifdef HAVE_ZLIB O.zlib_method = Z_DEFLATED; O.zlib_level = 9; O.zlib_windowBits = DPS_BASE_WRD_WINDOWBITS; O.zlib_memLevel = 9; O.zlib_strategy = DPS_BASE_WRD_STRATEGY; #endif New->subdir = DPS_TREEDIR; New->basename = "wrd"; New->indname = "wrd"; New->mode = DPS_WRITE_LOCK; New->NFiles = (size_t)DpsVarListFindInt(&Agent->Vars, "WrdFiles", 0x300); New->vardir = DpsVarListFindStr(&Agent->Vars, "VarDir", DPS_VAR_DIR); New->A = Agent; #ifdef HAVE_ZLIB N.zlib_method = Z_DEFLATED; N.zlib_level = 9; N.zlib_windowBits = DPS_BASE_WRD_WINDOWBITS; N.zlib_memLevel = 9; N.zlib_strategy = DPS_BASE_WRD_STRATEGY; #endif DpsLog(Agent, DPS_LOG_INFO, "Relocating Wrd database"); break; default: DPS_FREE(todel); return DPS_OK; } for (base = 0; base < O.NFiles; base++) { ndel = 0; if (have_sigterm || have_sigint || have_sigalrm) { DpsLog(Agent, DPS_LOG_EXTRA, "%s signal received. Exiting chackup", (have_sigterm) ? "SIGTERM" : (have_sigint) ? "SIGINT" : "SIGALRM"); DpsBaseClose(Old); DpsBaseClose(New); DPS_FREE(todel); return DPS_OK; } Old->rec_id = (urlid_t)(base << DPS_BASE_BITS); if (DpsBaseOpen(Old, DPS_READ_LOCK) != DPS_OK) { DpsBaseClose(Old); DpsBaseClose(New); continue; } if (lseek(O.Ifd, (off_t)0, SEEK_SET) == (off_t)-1) { DpsLog(Agent, DPS_LOG_ERROR, "Can't seeek for file %s", Old->Ifilename); DpsBaseClose(Old); DpsBaseClose(New); DPS_FREE(todel); return DPS_ERROR; } while (read(Old->Ifd, &Old->Item, sizeof(DPS_BASEITEM)) == sizeof(DPS_BASEITEM)) { if (Old->Item.rec_id != 0) { if (ndel >= mdel) { mdel += 128; todel = (urlid_t*)DpsRealloc(todel, mdel * sizeof(urlid_t)); if (todel == NULL) { DpsBaseClose(Old); DpsBaseClose(New); DpsLog(Agent, DPS_LOG_ERROR, "Can't realloc %d bytes %s:%d", mdel * sizeof(urlid_t), __FILE__, __LINE__); DPS_FREE(todel); return DPS_ERROR; } } todel[ndel++] = Old->Item.rec_id; } } DpsBaseClose(Old); for (i = 0; i < ndel; i++) { Old->rec_id = todel[i]; data = DpsBaseARead(Old, &data_len); if (data == NULL) continue; DpsBaseDelete(Old); DpsBaseClose(Old); New->rec_id = todel[i]; DpsBaseWrite(New, data, data_len); DpsBaseClose(New); DPS_FREE(data); } DpsLog(Agent, DPS_LOG_EXTRA, "\tbase: %d [0x%x], %d records relocated", base, base, ndel); } DPS_FREE(todel); for (base = N.NFiles; base < O.NFiles; base++) { Old->rec_id = (urlid_t)(base << DPS_BASE_BITS); if (DpsBaseOpen(Old, DPS_READ_LOCK) != DPS_OK) { DpsBaseClose(Old); continue; } unlink(O.Ifilename); unlink(O.Sfilename); DpsBaseClose(Old); } return DPS_OK; }
void RelLink(DPS_AGENT *Indexer, DPS_URL *curURL, DPS_URL *newURL, char **str, int ReverseAliasFlag) { const char *schema = newURL->schema ? newURL->schema : curURL->schema; const char *hostname = newURL->hostname ? newURL->hostname : curURL->hostname; const char *auth = newURL->auth ? newURL->auth : curURL->auth; const char *path = (newURL->path && newURL->path[0]) ? newURL->path : curURL->path; const char *fname = ((newURL->filename && newURL->filename[0]) || (newURL->path && newURL->path[0])) ? newURL->filename : curURL->filename; const char *query_string = newURL->query_string; char *pathfile = (char*)DpsMalloc(dps_strlen(DPS_NULL2EMPTY(path)) + dps_strlen(DPS_NULL2EMPTY(fname)) + dps_strlen(DPS_NULL2EMPTY(query_string)) + 5); int cascade; DPS_MATCH *Alias; char *alias = NULL; size_t aliassize, nparts = 10; DPS_MATCH_PART Parts[10]; if (newURL->hostinfo == NULL) newURL->charset_id = curURL->charset_id; if (pathfile == NULL) return; /* sprintf(pathfile, "/%s%s%s", DPS_NULL2EMPTY(path), DPS_NULL2EMPTY(fname), DPS_NULL2EMPTY(query_string));*/ pathfile[0] = '/'; dps_strcpy(pathfile + 1, DPS_NULL2EMPTY(path)); dps_strcat(pathfile, DPS_NULL2EMPTY(fname)); dps_strcat(pathfile, DPS_NULL2EMPTY(query_string)); DpsURLNormalizePath(pathfile); if (!strcasecmp(DPS_NULL2EMPTY(schema), "mailto") || !strcasecmp(DPS_NULL2EMPTY(schema), "javascript") || !strcasecmp(DPS_NULL2EMPTY(schema), "feed") ) { *str = (char*)DpsMalloc(dps_strlen(DPS_NULL2EMPTY(schema)) + dps_strlen(DPS_NULL2EMPTY(newURL->specific)) + 4); if (*str == NULL) return; /* sprintf(*str, "%s:%s", DPS_NULL2EMPTY(schema), DPS_NULL2EMPTY(newURL->specific));*/ dps_strcpy(*str, DPS_NULL2EMPTY(schema)); dps_strcat(*str, ":"); dps_strcat(*str, DPS_NULL2EMPTY(newURL->specific)); } else if(/*!strcasecmp(DPS_NULL2EMPTY(schema), "file") ||*/ !strcasecmp(DPS_NULL2EMPTY(schema), "htdb")) { *str = (char*)DpsMalloc(dps_strlen(DPS_NULL2EMPTY(schema)) + dps_strlen(pathfile) + 4); if (*str == NULL) return; /* sprintf(*str, "%s:%s", DPS_NULL2EMPTY(schema), pathfile);*/ dps_strcpy(*str, DPS_NULL2EMPTY(schema)); dps_strcat(*str, ":"); dps_strcat(*str, pathfile); }else{ *str = (char*)DpsMalloc(dps_strlen(DPS_NULL2EMPTY(schema)) + dps_strlen(pathfile) + dps_strlen(DPS_NULL2EMPTY(hostname)) + dps_strlen(DPS_NULL2EMPTY(auth)) + 8); if (*str == NULL) return; /* sprintf(*str, "%s://%s%s", DPS_NULL2EMPTY(schema), DPS_NULL2EMPTY(hostinfo), pathfile);*/ dps_strcpy(*str, DPS_NULL2EMPTY(schema)); dps_strcat(*str, "://"); if (auth) { dps_strcat(*str, auth); dps_strcat(*str,"@"); } dps_strcat(*str, DPS_NULL2EMPTY(hostname)); dps_strcat(*str, pathfile); } if(!strncmp(*str, "ftp://", 6) && (strstr(*str, ";type="))) *(strstr(*str, ";type")) = '\0'; DPS_FREE(pathfile); if (ReverseAliasFlag) { const char *alias_prog = DpsVarListFindStr(&Indexer->Vars, "ReverseAliasProg", NULL); if (alias_prog) { int result; aliassize = 256 + 2 * dps_strlen(*str); alias = (char*)DpsRealloc(alias, aliassize); if (alias == NULL) { DpsLog(Indexer, DPS_LOG_ERROR, "No memory (%d bytes). %s line %d", aliassize, __FILE__, __LINE__); goto ret; } alias[0] = '\0'; result = DpsAliasProg(Indexer, alias_prog, *str, alias, aliassize - 1); DpsLog(Indexer, DPS_LOG_EXTRA, "ReverseAliasProg result: '%s'", alias); if(result != DPS_OK) goto ret; DPS_FREE(*str); *str = (char*)DpsStrdup(alias); } for(cascade = 0; ((Alias=DpsMatchListFind(&Indexer->Conf->ReverseAliases,*str,nparts,Parts))) && (cascade < 1024); cascade++) { aliassize = dps_strlen(Alias->arg) + dps_strlen(Alias->pattern) + dps_strlen(*str) + 128; alias = (char*)DpsRealloc(alias, aliassize); if (alias == NULL) { DpsLog(Indexer, DPS_LOG_ERROR, "No memory (%d bytes). %s line %d", aliassize, __FILE__, __LINE__); goto ret; } DpsMatchApply(alias,aliassize,*str,Alias->arg,Alias,nparts,Parts); if(alias[0]){ DpsLog(Indexer,DPS_LOG_DEBUG,"ReverseAlias%d: pattern:%s, arg:%s -> '%s'", cascade, Alias->pattern, Alias->arg, alias); DPS_FREE(*str); *str = (char*)DpsStrdup(alias); } else break; if (Alias->last) break; } } ret: DPS_FREE(alias); }
int DpsCloneListSearchd(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc, DPS_RESULT *Res, DPS_DB *db) { DPS_SEARCHD_PACKET_HEADER hdr; ssize_t nsent,nrecv; char *msg = NULL, *dinfo = NULL; char *tok, *lt; char buf[128]; int done = 0; int rc = DPS_OK; TRACE_IN(Indexer, "DpsCloneListSearchd"); dps_snprintf(buf, 128, "%s", DpsVarListFindStr(&Doc->Sections, "DP_ID", "0")); hdr.cmd = DPS_SEARCHD_CMD_CLONES; hdr.len = dps_strlen(buf); nsent = DpsSearchdSendPacket(db->searchd, &hdr, buf); while(!done){ nrecv = DpsRecvall(db->searchd, &hdr, sizeof(hdr), 360); if(nrecv != sizeof(hdr)){ DpsLog(Indexer, DPS_LOG_ERROR, "Received incomplete header from searchd (%d bytes)", (int)nrecv); TRACE_OUT(Indexer); return(DPS_ERROR); }else{ #ifdef DEBUG_SDP DpsLog(Indexer, DPS_LOG_DEBUG, "Received header cmd=%d len=%d\n", hdr.cmd, hdr.len); #endif } switch(hdr.cmd){ case DPS_SEARCHD_CMD_ERROR: msg = (char*)DpsMalloc(hdr.len + 1); if (msg == NULL) { done=1; break; } nrecv = DpsRecvall(db->searchd, msg, hdr.len, 360); msg[(nrecv >= 0) ? nrecv : 0] = '\0'; sprintf(Indexer->Conf->errstr, "Searchd error: '%s'", msg); rc = DPS_ERROR; DPS_FREE(msg); done = 1; break; case DPS_SEARCHD_CMD_DOCINFO: dinfo = (char*)DpsMalloc(hdr.len + 1); if (dinfo == NULL) { done=1; break; } nrecv = DpsRecvall(db->searchd, dinfo, hdr.len, 360); dinfo[(nrecv >= 0) ? nrecv : 0] = '\0'; #ifdef DEBUG_SDP DpsLog(Indexer, DPS_LOG_DEBUG, "Received DOCINFO size=%d buf=%s\n", hdr.len, dinfo); #endif if (strcasecmp(dinfo, "nocloneinfo") != 0) { tok = dps_strtok_r(dinfo, "\r\n", <, NULL); while(tok){ DPS_DOCUMENT *D; size_t nd = Res->num_rows++; Res->Doc = (DPS_DOCUMENT*)DpsRealloc(Res->Doc, (Res->num_rows + 1) * sizeof(DPS_DOCUMENT)); if (Res->Doc == NULL) { sprintf(Indexer->Conf->errstr, "Realloc error"); rc = DPS_ERROR; break; } D = &Res->Doc[nd]; DpsDocInit(D); DpsDocFromTextBuf(D, tok); tok = dps_strtok_r(NULL, "\r\n", <, NULL); } } DPS_FREE(dinfo); done = 1; break; default: sprintf(Indexer->Conf->errstr, "Unknown searchd response: cmd=%d len=%d", hdr.cmd, hdr.len); rc = DPS_ERROR; done = 1; break; } } TRACE_OUT(Indexer); return rc; }
int __DPSCALL DpsResAddDocInfoSearchd(DPS_AGENT * query,DPS_DB *cl,DPS_RESULT * Res,size_t clnum){ DPS_SEARCHD_PACKET_HEADER hdr; char * msg=NULL; size_t i; /* num=0,curnum=0;*/ int done = 0; ssize_t nsent,nrecv; char * dinfo=NULL; int rc=DPS_OK; char *textbuf; size_t dlen = 0; TRACE_IN(query, "DpsResAddDocInfoSearchd"); if(!Res->num_rows) { TRACE_OUT(query); return(DPS_OK); } for(i=0;i<Res->num_rows;i++){ size_t ulen; size_t olen; size_t nsec, r; DPS_DOCUMENT *D=&Res->Doc[i]; r = (size_t) 's'; for(nsec = 0; nsec < D->Sections.Root[r].nvars; nsec++) if (strcasecmp(D->Sections.Root[r].Var[nsec].name, "Score") == 0) D->Sections.Root[r].Var[nsec].section = 1; #ifdef WITH_MULTIDBADDR if (D->dbnum != cl->dbnum) continue; #endif textbuf = DpsDocToTextBuf(D, 1, 0); if (textbuf == NULL) {TRACE_OUT(query); return DPS_ERROR;} ulen = dps_strlen(textbuf)+2; olen = dlen; dlen = dlen + ulen; dinfo = (char*)DpsRealloc(dinfo, dlen + 1); if (dinfo == NULL) { DpsFree(textbuf); TRACE_OUT(query); return DPS_ERROR; } dinfo[olen] = '\0'; sprintf(dinfo + olen, "%s\r\n", textbuf); DpsFree(textbuf); } if (dinfo == NULL) { TRACE_OUT(query); return DPS_OK; } hdr.cmd=DPS_SEARCHD_CMD_DOCINFO; hdr.len = dps_strlen(dinfo); nsent = DpsSearchdSendPacket(cl->searchd, &hdr, dinfo); #ifdef DEBUG_SDP DpsLog(query, DPS_LOG_ERROR, "Sent DOCINFO size=%d buf=%s\n", hdr.len, dinfo); #endif while(!done){ char * tok, * lt; nrecv = DpsRecvall(cl->searchd, &hdr, sizeof(hdr), 360); if(nrecv!=sizeof(hdr)){ DpsLog(query, DPS_LOG_ERROR, "Received incomplete header from searchd (%d bytes, errno:%d)", (int)nrecv, errno); TRACE_OUT(query); return(DPS_ERROR); }else{ #ifdef DEBUG_SDP DpsLog(query, DPS_LOG_ERROR, "Received header cmd=%d len=%d\n",hdr.cmd,hdr.len); #endif } switch(hdr.cmd){ case DPS_SEARCHD_CMD_ERROR: msg=(char*)DpsMalloc(hdr.len+1); if (msg == NULL) { done = 1; break; } nrecv = DpsRecvall(cl->searchd, msg, hdr.len, 360); msg[(nrecv >= 0) ? nrecv : 0]='\0'; sprintf(query->Conf->errstr,"Searchd error: '%s'",msg); rc=DPS_ERROR; DPS_FREE(msg); done=1; break; case DPS_SEARCHD_CMD_MESSAGE: msg=(char*)DpsMalloc(hdr.len+1); if (msg == NULL) { done = 1; break; } nrecv = DpsRecvall(cl->searchd, msg, hdr.len, 360); msg[(nrecv >= 0) ? nrecv : 0]='\0'; #ifdef DEBUG_SDP DpsLog(query, DPS_LOG_ERROR, "Message from searchd: '%s'\n",msg); #endif DPS_FREE(msg); break; case DPS_SEARCHD_CMD_DOCINFO: dinfo = (char*)DpsRealloc(dinfo, hdr.len + 1); if (dinfo == NULL) { done=1; break; } nrecv = DpsRecvall(cl->searchd, dinfo, hdr.len, 360); dinfo[(nrecv > 0) ? nrecv : 0] = '\0'; #ifdef DEBUG_SDP DpsLog(query, DPS_LOG_ERROR, "Received DOCINFO size=%d buf=%s\n",hdr.len,dinfo); #endif tok = dps_strtok_r(dinfo, "\r\n", <, NULL); while(tok){ urlid_t Doc_url_id, Res_Doc_url_id; DPS_DOCUMENT Doc; DpsDocInit(&Doc); DpsDocFromTextBuf(&Doc,tok); Doc_url_id = (urlid_t)DpsVarListFindInt(&Doc.Sections, "DP_ID", 0); for(i=0;i<Res->num_rows;i++){ #ifdef WITH_MULTIDBADDR if (Res->Doc[i].dbnum != cl->dbnum) continue; #endif Res_Doc_url_id = (urlid_t)DpsVarListFindInt(&Res->Doc[i].Sections, "DP_ID", 0); if (Res_Doc_url_id == Doc_url_id) { DpsDocFromTextBuf(&Res->Doc[i], tok); break; } } tok = dps_strtok_r(NULL, "\r\n", <, NULL); DpsDocFree(&Doc); } DPS_FREE(dinfo); done=1; break; default: sprintf(query->Conf->errstr,"Unknown searchd response: cmd=%d len=%d",hdr.cmd,hdr.len); rc=DPS_ERROR; done=1; break; } } TRACE_OUT(query); return rc; }
__C_LINK int __DPSCALL DpsBaseCheckup(DPS_BASE_PARAM *P, int (*checkrec) (DPS_AGENT *A, const urlid_t rec_id)) { int found; urlid_t i; size_t z; urlid_t *todel = (int*)DpsMalloc(128 * sizeof(urlid_t)); size_t ndel = 0, mdel = 128, totaldel = 0; if (todel == NULL) return DPS_ERROR; for (i = 0; i < (urlid_t)P->NFiles; i++) { if (have_sigterm || have_sigint || have_sigalrm) { DpsLog(P->A, DPS_LOG_EXTRA, "%s signal received. Exiting chackup", (have_sigterm) ? "SIGTERM" : (have_sigint) ? "SIGINT" : "SIGALRM"); DpsBaseClose(P); DPS_FREE(todel); return DPS_OK; } P->rec_id = i << DPS_BASE_BITS; if (DpsBaseOpen(P, DPS_READ_LOCK) != DPS_OK) { DpsBaseClose(P); continue; } if (lseek(P->Ifd, (off_t)0, SEEK_SET) == (off_t)-1) { DpsLog(P->A, DPS_LOG_ERROR, "Can't seeek for file %s", P->Ifilename); DpsBaseClose(P); DPS_FREE(todel); return DPS_ERROR; } while (read(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) == sizeof(DPS_BASEITEM)) { if (P->Item.rec_id != 0) { found = checkrec(P->A, P->Item.rec_id); if (found == 0) { if (ndel >= mdel) { mdel += 128; todel = (urlid_t*)DpsRealloc(todel, mdel * sizeof(urlid_t)); if (todel == NULL) { DpsBaseClose(P); DpsLog(P->A, DPS_LOG_ERROR, "Can't realloc %d bytes %s:%d", mdel * sizeof(urlid_t), __FILE__, __LINE__); DPS_FREE(todel); return DPS_ERROR; } } todel[ndel++] = P->Item.rec_id; } } } DpsBaseClose(P); for (z = 0; z < ndel; z++) { DpsLog(P->A, DPS_LOG_DEBUG, "Base %s/%s store %03X: deleting url_id: %X", P->subdir, P->basename, i, todel[z]); P->rec_id = todel[z]; DpsBaseDelete(P); } DpsBaseClose(P); DpsLog(P->A, DPS_LOG_INFO, "Base %s/%s store %03X, %d lost records deleted", P->subdir, P->basename, i, ndel); totaldel += ndel; ndel = 0; } DPS_FREE(todel); DpsLog(P->A, DPS_LOG_EXTRA, "Total lost record(s) deleted: %d\n", totaldel); return DPS_OK; }
int DpsCookiesAdd(DPS_AGENT *Indexer, const char *domain, const char * path, const char *name, const char *value, const char secure, dps_uint4 expires, const char from_config, int insert_flag) { #ifdef HAVE_SQL char buf[3*PATH_MAX]; char path_esc[2*PATH_MAX+1]; DPS_COOKIES *Cookies = &Indexer->Cookies; DPS_COOKIE *Coo; DPS_DB *db; dpshash32_t url_id = DpsStrHash32(domain); size_t i; #ifdef WITH_PARANOIA void *paran = DpsViolationEnter(paran); #endif if (Indexer->flags & DPS_FLAG_UNOCON) { if (Indexer->Conf->dbl.nitems == 0) return DPS_OK; DPS_GETLOCK(Indexer, DPS_LOCK_DB); db = Indexer->Conf->dbl.db[url_id % Indexer->Conf->dbl.nitems]; } else { if (Indexer->dbl.nitems == 0) return DPS_OK; db = Indexer->dbl.db[url_id % Indexer->dbl.nitems]; } (void)DpsDBEscStr(db, path_esc, DPS_NULL2EMPTY(path), dps_min(PATH_MAX,dps_strlen(DPS_NULL2EMPTY(path)))); for (i = 0; i < Cookies->ncookies; i++) { Coo = &Cookies->Cookie[i]; if (!strcasecmp(Coo->domain, domain) && !strcasecmp(Coo->path, DPS_NULL2EMPTY(path)) && !strcasecmp(Coo->name, name) && (Coo->secure == secure)/* && (Coo->from_config == from_config)*/ ) { DPS_FREE(Coo->value); Coo->value = DpsStrdup(value); /* Coo->expires = expires;*/ if (insert_flag) { dps_snprintf(buf, sizeof(buf), "UPDATE cookies SET value='%s',expires=%d WHERE domain='%s' AND path='%s' AND name='%s' AND secure='%c'", value, expires, domain, path_esc, name, secure); DpsSQLAsyncQuery(db, NULL, buf); } if (Indexer->flags & DPS_FLAG_UNOCON) DPS_RELEASELOCK(Indexer, DPS_LOCK_DB); #ifdef WITH_PARANOIA DpsViolationExit(Indexer->handle, paran); #endif return DPS_OK; } } Cookies->Cookie = (DPS_COOKIE*)DpsRealloc(Cookies->Cookie, (Cookies->ncookies + 1) * sizeof(DPS_COOKIE)); if(Cookies->Cookie == NULL) { Cookies->ncookies = 0; if (Indexer->flags & DPS_FLAG_UNOCON) DPS_RELEASELOCK(Indexer, DPS_LOCK_DB); #ifdef WITH_PARANOIA DpsViolationExit(Indexer->handle, paran); #endif return DPS_ERROR; } Coo = &Cookies->Cookie[Cookies->ncookies]; /* Coo->expires = expires;*/ Coo->secure = secure; Coo->from_config = from_config; Coo->domain = DpsStrdup(domain); Coo->path = DpsStrdup(path); Coo->name = DpsStrdup(name); Coo->value = DpsStrdup(value); if (insert_flag) { if (Indexer->Flags.CheckInsertSQL) { dps_snprintf(buf, sizeof(buf), "DELETE FROM cookies WHERE domain='%s' AND path='%s' AND name='%s' AND secure='%c'", domain, path_esc, name, secure); DpsSQLAsyncQuery(db, NULL, buf); } dps_snprintf(buf, sizeof(buf), "INSERT INTO cookies(expires,secure,domain,path,name,value)VALUES(%d,'%c','%s','%s','%s','%s')", expires, secure, domain, path_esc, name, value); DpsSQLAsyncQuery(db, NULL, buf); } Cookies->ncookies++; if (Indexer->flags & DPS_FLAG_UNOCON) DPS_RELEASELOCK(Indexer, DPS_LOCK_DB); #ifdef WITH_PARANOIA DpsViolationExit(Indexer->handle, paran); #endif #endif /*HAVE_SQL*/ return DPS_OK; }
static int MakeLinearIndex(DPS_AGENT *Indexer, const char *field, const char *lim_name, int type, DPS_DB *db) { DPS_ENV *Conf = Indexer->Conf; DPS_UINT4URLIDLIST L; size_t k,prev; urlid_t *data = NULL; DPS_UINT4_POS_LEN *ind=NULL; size_t mind=1000,nind=0; char fname[PATH_MAX]; int dat_fd=0, ind_fd=0, rc; const char *vardir = (db->vardir) ? db->vardir : DpsVarListFindStr(&Conf->Vars, "VarDir", DPS_VAR_DIR); bzero(&L, sizeof(DPS_UINT4URLIDLIST)); rc = DpsLimit4(Indexer, &L, field, type, db); if(rc != DPS_OK) { DpsLog(Indexer, DPS_LOG_ERROR, "Error: %s [%s:%d]", DpsEnvErrMsg(Conf), __FILE__, __LINE__); goto err1; } if(!L.Item)return(1); if (L.nitems > 1) DpsSort(L.Item, L.nitems, sizeof(DPS_UINT4URLID), (qsort_cmp)cmp_ind4); data = (urlid_t*)DpsMalloc((L.nitems + 1) * sizeof(*data)); if(!data) { fprintf(stderr,"Error1: %s\n",strerror(errno)); goto err1; } ind=(DPS_UINT4_POS_LEN*)DpsMalloc(mind*sizeof(DPS_UINT4_POS_LEN)); if(!ind) { fprintf(stderr,"Error2: %s\n",strerror(errno)); goto err1; } prev=0; for(k=0; k<L.nitems; k++) { data[k]=L.Item[k].url_id; if((k==L.nitems-1) || (L.Item[k].val!=L.Item[prev].val)) { if(nind==mind) { mind+=1000; ind=(DPS_UINT4_POS_LEN*)DpsRealloc(ind,mind*sizeof(DPS_UINT4_POS_LEN)); if(!ind) { fprintf(stderr,"Error3: %s\n",strerror(errno)); goto err1; } } /* Fill index */ ind[nind].val=L.Item[prev].val; ind[nind].pos = prev * sizeof(*data); if (k == L.nitems - 1) ind[nind].len = (k - prev + 1) * sizeof(*data); else ind[nind].len = (k - prev) * sizeof(*data); DpsLog(Indexer, DPS_LOG_DEBUG, "%d - pos:%x len:%d\n", ind[nind].val, (int)ind[nind].pos, ind[nind].len); nind++; prev=k; } } if (L.mapped) { #ifdef HAVE_SYS_MMAN_H if (munmap(L.Item, (L.nitems + 1) * sizeof(DPS_UINT4URLID))) { fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno)); } #elif defined(HAVE_SYS_SHM_H) if (shmdt(L.Item)) { fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno)); } #endif unlink(L.shm_name); } else { DPS_FREE(L.Item); } dps_snprintf(fname,sizeof(fname),"%s%c%s%c%s.dat", vardir,DPSSLASH, DPS_TREEDIR, DPSSLASH, lim_name); if((dat_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) { fprintf(stderr,"Can't open '%s': %s\n",fname,strerror(errno)); goto err1; } DpsWriteLock(dat_fd); if((L.nitems * sizeof(*data)) != (size_t)write(dat_fd, data, L.nitems * sizeof(*data))) { fprintf(stderr,"Can't write '%s': %s\n",fname,strerror(errno)); goto err1; } DpsUnLock(dat_fd); DpsClose(dat_fd); DPS_FREE(data); dps_snprintf(fname,sizeof(fname),"%s%c%s%c%s.ind", vardir,DPSSLASH, DPS_TREEDIR, DPSSLASH, lim_name); if((ind_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) { fprintf(stderr,"Can't open '%s': %s\n",fname,strerror(errno)); goto err1; } DpsWriteLock(ind_fd); if((nind*sizeof(DPS_UINT4_POS_LEN)) != (size_t)write(ind_fd,ind,nind*sizeof(DPS_UINT4_POS_LEN))) { fprintf(stderr,"Can't write '%s': %s\n",fname,strerror(errno)); goto err1; } DpsUnLock(ind_fd); DpsClose(ind_fd); DPS_FREE(ind); return(0); err1: if (L.mapped) { #ifdef HAVE_SYS_MMAN_H if (munmap(L.Item, (L.nitems + 1) * sizeof(DPS_UINT4URLID))) { fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno)); } #elif defined(HAVE_SYS_SHM_H) if (shmdt(L.Item)) { fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno)); } #endif unlink(L.shm_name); } else { DPS_FREE(L.Item); } DPS_FREE(data); DPS_FREE(ind); if(dat_fd) DpsClose(dat_fd); if(ind_fd) DpsClose(ind_fd); return(1); }
__C_LINK int __DPSCALL DpsSynonymListLoad(DPS_ENV * Env,const char * filename){ struct stat sb; char *str, *data = NULL, *cur_n = NULL; char lang[64]=""; DPS_CHARSET *cs=NULL; DPS_CHARSET *sys_int=DpsGetCharSet("sys-int"); DPS_CONV file_uni; DPS_WIDEWORD *ww = NULL; size_t key = 1; int flag_th = 0; int fd; char savebyte; if (stat(filename, &sb)) { fprintf(stderr, "Unable to stat synonyms file '%s': %s", filename, strerror(errno)); return DPS_ERROR; } if ((fd = DpsOpen2(filename, O_RDONLY)) <= 0) { dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to open synonyms file '%s': %s", filename, strerror(errno)); return DPS_ERROR; } if ((data = (char*)DpsMalloc(sb.st_size + 1)) == NULL) { dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to alloc %d bytes", sb.st_size); DpsClose(fd); return DPS_ERROR; } if (read(fd, data, sb.st_size) != (ssize_t)sb.st_size) { dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to read synonym file '%s': %s", filename, strerror(errno)); DPS_FREE(data); DpsClose(fd); return DPS_ERROR; } data[sb.st_size] = '\0'; str = data; cur_n = strchr(str, '\n'); if (cur_n != NULL) { cur_n++; savebyte = *cur_n; *cur_n = '\0'; } while(str != NULL) { if(str[0]=='#'||str[0]==' '||str[0]=='\t'||str[0]=='\r'||str[0]=='\n') goto loop_continue; if(!strncasecmp(str,"Charset:",8)){ char * lasttok; char * charset; if((charset = dps_strtok_r(str + 8, " \t\n\r", &lasttok))) { cs=DpsGetCharSet(charset); if(!cs){ dps_snprintf(Env->errstr, sizeof(Env->errstr), "Unknown charset '%s' in synonyms file '%s'", charset, filename); DPS_FREE(data); DpsClose(fd); return DPS_ERROR; } DpsConvInit(&file_uni, cs, sys_int, Env->CharsToEscape, 0); } }else if(!strncasecmp(str,"Language:",9)){ char * lasttok; char * l; if((l = dps_strtok_r(str + 9, " \t\n\r", &lasttok))) { dps_strncpy(lang, l, sizeof(lang)-1); } }else if(!strncasecmp(str, "Thesaurus:", 10)) { char * lasttok; char *tok = dps_strtok_r(str + 10, " \t\n\r", &lasttok); flag_th = (strncasecmp(tok, "yes", 3) == 0) ? 1 : 0; }else{ char *av[255]; size_t ac, i, j; dpsunicode_t *t; if(!cs){ dps_snprintf(Env->errstr,sizeof(Env->errstr)-1,"No Charset command in synonyms file '%s'",filename); DpsClose(fd); DPS_FREE(data); return DPS_ERROR; } if(!lang[0]){ dps_snprintf(Env->errstr,sizeof(Env->errstr)-1,"No Language command in synonyms file '%s'",filename); DpsClose(fd); DPS_FREE(data); return DPS_ERROR; } ac = DpsGetArgs(str, av, 255); if (ac < 2) goto loop_continue; if ((ww = (DPS_WIDEWORD*)DpsRealloc(ww, ac * sizeof(DPS_WIDEWORD))) == NULL) return DPS_ERROR; for (i = 0; i < ac; i++) { ww[i].word = av[i]; ww[i].len = dps_strlen(av[i]); ww[i].uword = t = (dpsunicode_t*)DpsMalloc((3 * ww[i].len + 1) * sizeof(dpsunicode_t)); if (ww[i].uword == NULL) return DPS_ERROR; DpsConv(&file_uni, (char*)ww[i].uword, sizeof(dpsunicode_t) * (3 * ww[i].len + 1), av[i], ww[i].len + 1); DpsUniStrToLower(ww[i].uword); ww[i].uword = DpsUniNormalizeNFC(NULL, ww[i].uword); DPS_FREE(t); } for (i = 0; i < ac - 1; i++) { for (j = i + 1; j < ac; j++) { if((Env->Synonyms.nsynonyms + 1) >= Env->Synonyms.msynonyms){ Env->Synonyms.msynonyms += 64; Env->Synonyms.Synonym = (DPS_SYNONYM*)DpsRealloc(Env->Synonyms.Synonym, sizeof(DPS_SYNONYM)*Env->Synonyms.msynonyms); if (Env->Synonyms.Synonym == NULL) { Env->Synonyms.msynonyms = Env->Synonyms.nsynonyms = 0; return DPS_ERROR; } } bzero((void*)&Env->Synonyms.Synonym[Env->Synonyms.nsynonyms], sizeof(DPS_SYNONYM)); /* Add direct order */ Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.uword = DpsUniDup(ww[i].uword); Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.uword = DpsUniDup(ww[j].uword); Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.count = Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.count = (size_t)((flag_th) ? key : 0); Env->Synonyms.nsynonyms++; bzero((void*)&Env->Synonyms.Synonym[Env->Synonyms.nsynonyms], sizeof(DPS_SYNONYM)); /* Add reverse order */ Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.uword = DpsUniDup(ww[j].uword); Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.uword = DpsUniDup(ww[i].uword); Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.count = Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.count = (size_t)((flag_th) ? key : 0); Env->Synonyms.nsynonyms++; } } for (i = 0; i < ac; i++) { DPS_FREE(ww[i].uword); } do { key++; } while (key == 0); } loop_continue: str = cur_n; if (str != NULL) { *str = savebyte; cur_n = strchr(str, '\n'); if (cur_n != NULL) { cur_n++; savebyte = *cur_n; *cur_n = '\0'; } } } DPS_FREE(data); DPS_FREE(ww); DpsClose(fd); return DPS_OK; }
int DpsSEAMake(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc, DPS_DSTR *excerpt, const char *content_lang, size_t *indexed_size, size_t *indexed_limit, size_t max_word_len, size_t min_word_len, int crossec, int seasec #ifdef HAVE_ASPELL , int have_speller, AspellSpeller *speller #endif ) { DPS_SENTENCELIST List; DPS_MAPSTAT MapStat; DPS_TEXTITEM Item; DPS_VAR *Sec; dpsunicode_t *sentence, *lt, savec; double *links, *lang_cs, w; double delta, pdiv, cur_div; size_t l, sent_len, order; size_t min_len = 10000000, min_pos = 0; int it; register size_t i, j; #ifdef DEBUG char lcstr[4096]; #endif TRACE_IN(Indexer, "DpsSEAMake"); if((Sec = DpsVarListFind(&Doc->Sections, "sea"))) { /* set SEA section to NULL */ DPS_FREE(Sec->val); DPS_FREE(Sec->txt_val); Sec->curlen = 0; } bzero(&List, sizeof(List)); order = 0; sentence = DpsUniStrTok_SEA((dpsunicode_t*)excerpt->data, <); while(sentence) { if (lt != NULL) { savec = *lt; *lt = 0; } #ifdef DEBUG DpsConv(&Indexer->uni_lc, lcstr, sizeof(lcstr), (char*)sentence, sizeof(dpsunicode_t) * (DpsUniLen(sentence) + 1)); fprintf(stderr, "Sentence.%d: %s\n", List.nitems, lcstr); #endif if ((sent_len = DpsUniLen(sentence)) >= Indexer->Flags.SEASentenceMinLength) { j = 1; for (i = 0; i < List.nitems; i++) { if (DpsUniStrCmp(sentence, List.Sent[i].sentence) == 0) { j = 0; break; } } if (j) { if ( List.nitems < Indexer->Flags.SEASentences ) { if (List.nitems == List.mitems) { List.mitems += 16; List.Sent = (DPS_SENTENCE*)DpsRealloc(List.Sent, List.mitems * sizeof(DPS_SENTENCE)); if (List.Sent == NULL) { TRACE_OUT(Indexer); return DPS_ERROR;} } List.Sent[List.nitems].sentence = DpsUniDup(sentence); List.Sent[List.nitems].len = sent_len; List.Sent[List.nitems].order = order++; sentence = DpsUniDup(sentence); DpsUniStrToLower(sentence); bzero(&List.Sent[List.nitems].LangMap, sizeof(DPS_LANGMAP)); DpsBuildLangMap(&List.Sent[List.nitems].LangMap, (char*)sentence, sent_len * sizeof(dpsunicode_t), 0, 0); if (sent_len < min_len) { min_len = sent_len; min_pos = List.nitems; } List.nitems++; DPS_FREE(sentence); } else if (sent_len > min_len) { DPS_FREE(List.Sent[min_pos].sentence); List.Sent[min_pos].sentence = DpsUniDup(sentence); List.Sent[min_pos].len = sent_len; List.Sent[min_pos].order = order++; sentence = DpsUniDup(sentence); DpsUniStrToLower(sentence); bzero(&List.Sent[min_pos].LangMap, sizeof(DPS_LANGMAP)); DpsBuildLangMap(&List.Sent[min_pos].LangMap, (char*)sentence, sent_len * sizeof(dpsunicode_t), 0, 0); DPS_FREE(sentence); min_len = List.Sent[0].len; min_pos = 0; for(i = 1; i < List.nitems; i++) if (List.Sent[i].len < min_len) { min_len = List.Sent[i].len; min_pos = i; } } } } #ifdef DEBUG fprintf(stderr, "Sent. len.:%d, Min.allowed: %d\n", sent_len, Indexer->Flags.SEASentenceMinLength); #endif if (lt != NULL) *lt = savec; sentence = DpsUniStrTok_SEA(NULL, <); } DpsLog(Indexer, DPS_LOG_DEBUG, "SEA sentences: %d", List.nitems); if (List.nitems < 4) { for (i = 0; i < List.nitems; i++) DPS_FREE(List.Sent[i].sentence); DPS_FREE(List.Sent); TRACE_OUT(Indexer); return DPS_OK; } links = (double*)DpsMalloc(sizeof(double) * List.nitems * List.nitems); lang_cs = (double*)DpsMalloc(sizeof(double) * List.nitems); /* k ot links[i * List.nitems + j] */ if (links != NULL && lang_cs != NULL) { for (i = 0; i < List.nitems; i++) { DpsPrepareLangMap(&List.Sent[i].LangMap); } for (i = 0; i < List.nitems; i++) { List.Sent[i].Oi = List.Sent[i].di = 0.5; if (Doc->lang_cs_map == NULL) { links[i * List.nitems + i] = 0.0; } else { MapStat.map = &List.Sent[i].LangMap; DpsCheckLangMap6(Doc->lang_cs_map, &List.Sent[i].LangMap, &MapStat, DPS_LM_TOPCNT * DPS_LM_TOPCNT, 2 * DPS_LM_TOPCNT); links[i * List.nitems + i] = (double)MapStat.hits / (2.0 * DPS_LM_TOPCNT) / (List.nitems + 1); } #ifdef DEBUG DpsLog(Indexer, DPS_LOG_INFO, "Link %u->%u: %f [hits:%d miss:%d]", i, i, links[i * List.nitems + i], MapStat.hits, MapStat.miss); #endif for (j = 0; j < List.nitems; j++) { if (j == i) continue; MapStat.map = &List.Sent[j].LangMap; DpsCheckLangMap6(&List.Sent[j].LangMap, &List.Sent[i].LangMap, &MapStat, DPS_LM_TOPCNT * DPS_LM_TOPCNT, 2 * DPS_LM_TOPCNT); links[i * List.nitems + j] = (double)MapStat.hits / (2.0 * DPS_LM_TOPCNT) / (List.nitems + 1); #ifdef DEBUG DpsLog(Indexer, DPS_LOG_INFO, "Link %u->%u: %f [hits:%d miss:%d]", i, j, links[i * List.nitems + j], MapStat.hits, MapStat.miss); #endif } } for (l = 0; l < List.nitems; l++) { w = 0.0; for (i = 0; i < List.nitems; i++) { w += links[l * List.nitems + i] * List.Sent[i].Oi; } w = f(w); if (w < LOW_BORDER_EPS2) w = LOW_BORDER_EPS2; else if (w > HI_BORDER_EPS2) w = HI_BORDER_EPS2; List.Sent[l].di = w; } DpsSort(List.Sent, List.nitems, sizeof(DPS_SENTENCE), (qsort_cmp)SentCmp); #ifdef DEBUG DpsConv(&Indexer->uni_lc, lcstr, sizeof(lcstr), (char*)List.Sent[0].sentence, sizeof(dpsunicode_t) * (DpsUniLen(List.Sent[0].sentence) + 1)); fprintf(stderr, "Sent.0: %f %f -- %s\n", List.Sent[0].di, List.Sent[0].Oi, lcstr); DpsConv(&Indexer->uni_lc, lcstr, sizeof(lcstr), (char*)List.Sent[1].sentence, sizeof(dpsunicode_t) * (DpsUniLen(List.Sent[1].sentence) + 1)); fprintf(stderr, "Sent.1: %f %f -- %s\n", List.Sent[1].di, List.Sent[1].Oi, lcstr); DpsConv(&Indexer->uni_lc, lcstr, sizeof(lcstr), (char*)List.Sent[2].sentence, sizeof(dpsunicode_t) * (DpsUniLen(List.Sent[2].sentence) + 1)); fprintf(stderr, "Sent.2: %f %f -- %s\n", List.Sent[2].di, List.Sent[2].Oi, lcstr); DpsConv(&Indexer->uni_lc, lcstr, sizeof(lcstr), (char*)List.Sent[3].sentence, sizeof(dpsunicode_t) * (DpsUniLen(List.Sent[3].sentence) + 1)); fprintf(stderr, "Sent.3: %f %f -- %s\n", List.Sent[3].di, List.Sent[3].Oi, lcstr); DpsConv(&Indexer->uni_lc, lcstr, sizeof(lcstr), (char*)List.Sent[4].sentence, sizeof(dpsunicode_t) * (DpsUniLen(List.Sent[4].sentence) + 1)); fprintf(stderr, "Sent.4: %f %f -- %s\n", List.Sent[4].di, List.Sent[4].Oi, lcstr); #endif DpsSort(List.Sent, TOP_SENTENCES, sizeof(DPS_SENTENCE), (qsort_cmp)SentOrderCmp); bzero(&Item, sizeof(Item)); Item.section = seasec; Item.href = NULL; Item.section_name = "sea"; for (i = 0; i < TOP_SENTENCES; i++) { dpsunicode_t *UStr = DpsUniDup(List.Sent[i].sentence); DpsPrepareItem(Indexer, Doc, &Item, List.Sent[i].sentence, UStr, content_lang, indexed_size, indexed_limit, max_word_len, min_word_len, crossec #ifdef HAVE_ASPELL , have_speller, speller, NULL #endif ); DPS_FREE(UStr); } } DPS_FREE(lang_cs); DPS_FREE(links); for (i = 0; i < List.nitems; i++) DPS_FREE(List.Sent[i].sentence); DPS_FREE(List.Sent); TRACE_OUT(Indexer); return DPS_OK; }
int main(int argc, char ** argv, char **envp) { const char *env, *bcharset, *lcharset, *conf_dir; char template_name[PATH_MAX+6]=""; char *template_filename = NULL; char *query_string = NULL; char self[1024]=""; char *url = NULL; const char *ResultContentType; int res,httpd=0; size_t catcolumns = 0; int page_size,page_number; DPS_ENV *Env; DPS_AGENT *Agent; DPS_VARLIST query_vars; /* Output Content-type if under HTTPD */ /* Some servers do not pass QUERY_STRING */ /* if the query was empty, so check */ /* REQUEST_METHOD too to be safe */ httpd=(getenv("QUERY_STRING")||getenv("REQUEST_METHOD")); if (!(conf_dir=getenv("DPS_ETC_DIR"))) conf_dir=DPS_CONF_DIR; DpsInit(argc, argv, envp); Env=DpsEnvInit(NULL); if (Env == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't alloc Env\n"); exit(0); } DpsVarListInit(&query_vars); Agent = DpsAgentInit(NULL, Env, 0); if (Agent == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't alloc Agent\n"); exit(0); } DpsVarListAddEnviron(&Env->Vars,"ENV"); /* Detect self and template name */ if((env = getenv("DPSEARCH_TEMPLATE"))) dps_strncpy(template_name, env, sizeof(template_name) - 1); else if((env = getenv("PATH_INFO")) && env[0]) dps_strncpy(template_name, env + 1, sizeof(template_name) - 1); if((env=getenv("DPSEARCH_SELF"))) dps_strncpy(self,env,sizeof(self)-1); if((env=getenv("QUERY_STRING"))){ query_string = (char*)DpsRealloc(query_string, dps_strlen(env) + 2); if (query_string == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't alloc query_string\n"); exit(0); } dps_strncpy(query_string, env, dps_strlen(env) + 1); /* Hack for Russian Apache from apache.lexa.ru */ /* QUERY_STRING is already converted to server */ /* character set. We must print original query */ /* string instead however. Under usual apache */ /* we'll use QUERY_STRING. Note that query_vars */ /* list will contain not unescaped values, so */ /* we don't have to escape them when displaying */ env = getenv("CHARSET_SAVED_QUERY_STRING"); DpsParseQStringUnescaped(&query_vars,env?env:query_string); /* Unescape and save variables from QUERY_STRING */ /* Env->Vars will have unescaped values however */ DpsParseQueryString(Agent,&Env->Vars,query_string); template_filename = (char*)DpsStrdup(DpsVarListFindStr(&Env->Vars, "tmplt", "")); if((env=getenv("REDIRECT_STATUS"))){ /* Check Apache internal redirect */ /* via "AddHandler" and "Action" */ if(!self[0]){ dps_strncpy(self,(env=getenv("REDIRECT_URL"))?env:"filler.cgi",sizeof(self)-1); } if(!template_name[0]){ dps_strncpy(template_name,(env=getenv("PATH_TRANSLATED"))?env:"",sizeof(template_name)-1); } if (*template_filename == '\0') { DPS_FREE(template_filename); template_filename = (char*)DpsStrdup("filler.htm"); } }else{ /* CGI executed without Apache internal redirect */ /* Detect $Self variable with OS independant SLASHES */ if(!self[0]){ dps_strncpy(self,(env=getenv("SCRIPT_NAME"))?env:"filler.cgi",sizeof(self)-1); } if(!template_name[0]){ char *s,*e; /*This is with OS specific SLASHES */ env=((env=getenv("SCRIPT_FILENAME"))?env:"filler.cgi"); if(strcmp(conf_dir,".")){ /* Take from the config directory */ dps_snprintf(template_name, sizeof(template_name)-1, "%s/%s", conf_dir,(s=strrchr(env,DPSSLASH))?(s+1):(self)); }else{ /* Take from the current directory */ dps_strncpy(template_name,env,sizeof(template_name)-1); } /* Find right slash if it presents */ s=((s=strrchr(template_name,DPSSLASH))?s:template_name); if (*template_filename == '\0') { /* Find .cgi substring */ if ((e = strstr(s, ".cgi")) != NULL) { /* Replace ".cgi" with ".htm" */ e[1]='h';e[2]='t';e[3]='m'; } else { dps_strcat(s, ".htm"); } e = strrchr(s, '/'); DPS_FREE(template_filename); template_filename = (char*)DpsStrdup(e + 1); } else { dps_strncpy(s + 1, template_filename, sizeof(template_name) - (s - template_name) - 2); } } } }else{ /* Executed from command line */ /* or under server which does not */ /* pass an empty QUERY_STRING var */ if(argv[1]) { query_string = (char*)DpsRealloc(query_string, dps_strlen(argv[1]) + 10); if (query_string == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't realloc query_string\n"); exit(0); } sprintf(query_string, "q=%s", argv[1]); } else { query_string = (char*)DpsRealloc(query_string, 1024); if (query_string == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't realloc query_string\n"); exit(0); } sprintf(query_string, "q="); } /* Hack for Russian Apache from apache.lexa.ru */ /* QUERY_STRING is already converted to server */ /* character set. We must print original query */ /* string instead however. Under usual apache */ /* we'll use QUERY_STRING. Note that query_vars */ /* list will contain not unescaped values, so */ /* we don't have to escape them when displaying */ env = getenv("CHARSET_SAVED_QUERY_STRING"); DpsParseQStringUnescaped(&query_vars,env?env:query_string); /* Unescape and save variables from QUERY_STRING */ /* Env->Vars will have unescaped values however */ DpsParseQueryString(Agent,&Env->Vars,query_string); DPS_FREE(template_filename); template_filename = (char*)DpsStrdup(DpsVarListFindStr(&Env->Vars, "tmplt", "")); if (*template_filename == '\0') { DPS_FREE(template_filename); template_filename = (char*)DpsStrdup("filler.htm"); } /*// Get template name from command line variable &tmplt */ if(!template_name[0]) dps_snprintf(template_name,sizeof(template_name),"%s/%s", conf_dir, template_filename); } DpsVarListReplaceStr(&Agent->Conf->Vars, "tmplt", template_filename); DPS_FREE(template_filename); Agent->tmpl.Env_Vars = &Env->Vars; DpsURLNormalizePath(template_name); if (strncmp(template_name, conf_dir, dps_strlen(conf_dir)) || (res = DpsTemplateLoad(Agent, Env, &Agent->tmpl, template_name))) { if (strcmp(template_name, "filler.htm")) { /* trying load default template */ fprintf(stderr, "Can't load template: '%s' %s\n", template_name, Env->errstr); DPS_FREE(template_filename); template_filename = (char*)DpsStrdup("filler.htm"); dps_snprintf(template_name, sizeof(template_name), "%s/%s", conf_dir, template_filename); if ((res = DpsTemplateLoad(Agent, Env, &Agent->tmpl, template_name))) { if(httpd)printf("Content-Type: text/plain\r\n\r\n"); printf("%s\n",Env->errstr); DpsVarListFree(&query_vars); DpsEnvFree(Env); DPS_FREE(query_string); DpsAgentFree(Agent); return(0); } } else { if(httpd)printf("Content-Type: text/plain\r\n\r\n"); printf("%s\n",Env->errstr); DpsVarListFree(&query_vars); DpsEnvFree(Env); DPS_FREE(query_string); DpsAgentFree(Agent); return(0); } } /* set locale if specified */ if ((url = DpsVarListFindStr(&Env->Vars, "Locale", NULL)) != NULL) { setlocale(LC_ALL, url); /*#ifdef HAVE_ASPELL*/ { char *p; if ((p = strchr(url, '.')) != NULL) { *p = '\0'; DpsVarListReplaceStr(&Env->Vars, "g-lc", url); *p = '.'; } } /*#endif*/ url = NULL; } /* Call again to load search Limits if need */ DpsParseQueryString(Agent, &Env->Vars, query_string); Agent->Flags = Env->Flags; Agent->flags |= DPS_FLAG_UNOCON; Env->flags |= DPS_FLAG_UNOCON; DpsSetLogLevel(NULL, DpsVarListFindInt(&Env->Vars, "LogLevel", 0)); DpsOpenLog("filler.cgi", Env, !strcasecmp(DpsVarListFindStr(&Env->Vars, "Log2stderr", (!httpd) ? "yes" : "no"), "yes")); DpsLog(Agent,DPS_LOG_ERROR,"filler.cgi started with '%s'",template_name); DpsLog(Agent, DPS_LOG_DEBUG, "VarDir: '%s'", DpsVarListFindStr(&Agent->Conf->Vars, "VarDir", DPS_VAR_DIR)); DpsLog(Agent, DPS_LOG_DEBUG, "Affixes: %d, Spells: %d, Synonyms: %d, Acronyms: %d, Stopwords: %d", Env->Affixes.naffixes,Env->Spells.nspell, Env->Synonyms.nsynonyms, Env->Acronyms.nacronyms, Env->StopWords.nstopwords); DpsLog(Agent, DPS_LOG_DEBUG, "Chinese dictionary with %d entries", Env->Chi.nwords); DpsLog(Agent, DPS_LOG_DEBUG, "Korean dictionary with %d entries", Env->Korean.nwords); DpsLog(Agent, DPS_LOG_DEBUG, "Thai dictionary with %d entries", Env->Thai.nwords); DpsVarListAddLst(&Agent->Vars, &Env->Vars, NULL, "*"); Agent->tmpl.Env_Vars = &Agent->Vars; /* DpsVarListAddEnviron(&Agent->Vars, "ENV");*/ /****************************************************************************************************************************************/ /* This is for query tracking */ DpsVarListAddStr(&Agent->Vars, "QUERY_STRING", query_string); DpsVarListAddStr(&Agent->Vars, "self", self); env = getenv("HTTP_X_FORWARDER_FOR"); if (env) { DpsVarListAddStr(&Agent->Vars, "IP", env); } else { env = getenv("REMOTE_ADDR"); DpsVarListAddStr(&Agent->Vars, "IP", env ? env : "localhost"); } bcharset = DpsVarListFindStr(&Agent->Vars, "BrowserCharset", "iso-8859-1"); Env->bcs=DpsGetCharSet(bcharset); lcharset = DpsVarListFindStr(&Agent->Vars, "LocalCharset", "iso-8859-1"); Env->lcs=DpsGetCharSet(lcharset); ResultContentType = DpsVarListFindStr(&Agent->Vars, "ResultContentType", "text/html"); if(httpd){ if(!Env->bcs){ printf("Content-Type: text/plain\r\n\r\n"); printf("Unknown BrowserCharset '%s' in template '%s'\n",bcharset,template_name); exit(0); }else if(!Env->lcs){ printf("Content-Type: text/plain\r\n\r\n"); printf("Unknown LocalCharset '%s' in template '%s'\n",lcharset,template_name); exit(0); }else{ printf("Content-type: %s; charset=%s\r\n\r\n", ResultContentType, bcharset); } }else{ if(!Env->bcs){ printf("Unknown BrowserCharset '%s' in template '%s'\n",bcharset,template_name); exit(0); } if(!Env->lcs){ printf("Unknown LocalCharset '%s' in template '%s'\n",lcharset,template_name); exit(0); } } /* These parameters taken from "variable section of template"*/ res = DpsVarListFindInt(&Agent->Vars, "ps", DPS_DEFAULT_PS); page_size = dps_min(res, MAX_PS); page_number = DpsVarListFindInt(&Agent->Vars, "p", 0); if (page_number == 0) { page_number = DpsVarListFindInt(&Agent->Vars, "np", 0); DpsVarListReplaceInt(&Agent->Vars, "p", page_number + 1); } else page_number--; res = DpsVarListFindInt(&Agent->Vars, "np", 0) * page_size; DpsVarListAddInt(&Agent->Vars, "pn", res); catcolumns = (size_t)atoi(DpsVarListFindStr(&Agent->Vars, "CatColumns", "")); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "top"); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "restop"); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "res"); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "resbot"); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "bottom"); DpsVarListFree(&query_vars); DpsAgentFree(Agent); DpsEnvFree(Env); DPS_FREE(query_string); DPS_FREE(url); if (httpd) fflush(NULL); else fclose(stdout); #ifdef EFENCE fprintf(stderr, "Memory leaks checking\n"); DpsEfenceCheckLeaks(); #endif #ifdef FILENCE fprintf(stderr, "FD leaks checking\n"); DpsFilenceCheckLeaks(NULL); #endif return DPS_OK; }
static int MakeNestedIndex(DPS_AGENT *Indexer, DPS_UINT8URLIDLIST *L, const char *lim_name, DPS_DB *db) { DPS_ENV *Conf = Indexer->Conf; size_t k, prev; urlid_t *data=NULL; DPS_UINT8_POS_LEN *ind=NULL; size_t mind=1000, nind=0, ndata; char fname[PATH_MAX]; int dat_fd=0, ind_fd=0; int rc=DPS_OK; const char *vardir = (db->vardir) ? db->vardir : DpsVarListFindStr(&Conf->Vars, "VarDir", DPS_VAR_DIR); if(!L->Item)return(1); if (L->nitems > 1) DpsSort(L->Item, L->nitems, sizeof(DPS_UINT8URLID), (qsort_cmp)cmp_ind8); data = (urlid_t*)DpsMalloc((L->nitems + 1) * sizeof(urlid_t)); if(!data){ DpsLog(Indexer, DPS_LOG_ERROR, "Can't alloc %d bytes [%s:%d]", (L->nitems + 1) * sizeof(urlid_t), __FILE__, __LINE__); goto err1; } ind=(DPS_UINT8_POS_LEN*)DpsMalloc(mind*sizeof(DPS_UINT8_POS_LEN)); if(!ind){ DpsLog(Indexer, DPS_LOG_ERROR, "Can't alloc %d bytes [%s:%d]", mind * sizeof(DPS_UINT8_POS_LEN), __FILE__, __LINE__); goto err1; } prev=0; for(k=0; k < L->nitems; k++) { data[k] = L->Item[k].url_id; if((k == L->nitems-1) || (L->Item[k].hi != L->Item[prev].hi) || (L->Item[k].lo != L->Item[prev].lo)) { if(nind==mind){ mind+=1000; ind=(DPS_UINT8_POS_LEN*)DpsRealloc(ind,mind*sizeof(DPS_UINT8_POS_LEN)); if(!ind) { DpsLog(Indexer, DPS_LOG_ERROR, "Can't alloc %d bytes [%s:%d]", mind * sizeof(DPS_UINT8_POS_LEN), __FILE__, __LINE__); goto err1; } } /* Fill index */ ind[nind].hi = L->Item[prev].hi; ind[nind].lo = L->Item[prev].lo; ind[nind].pos = prev * sizeof(*data); if (k == L->nitems - 1) ind[nind].len = (k - prev + 1) * sizeof(*data); else ind[nind].len = (k - prev) * sizeof(*data); DpsLog(Indexer, DPS_LOG_DEBUG, "%08X%08X - %d %d\n", ind[nind].hi, ind[nind].lo, (int)ind[nind].pos, ind[nind].len); nind++; prev=k; } } ndata = L->nitems; ClearIndex8(L); dps_snprintf(fname,sizeof(fname)-1,"%s%c%s%c%s.dat", vardir,DPSSLASH, DPS_TREEDIR,DPSSLASH, lim_name); if((dat_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) { DpsLog(Indexer, DPS_LOG_ERROR, "Can't open '%s': %s [%s:%d]", fname, strerror(errno), __FILE__, __LINE__); goto err1; } DpsWriteLock(dat_fd); if((ndata * sizeof(*data)) != (size_t)write(dat_fd, data, ndata * sizeof(*data))) { DpsLog(Indexer, DPS_LOG_ERROR, "Can't write '%s': %s [%s:%d]", fname, strerror(errno), __FILE__, __LINE__); goto err1; } DpsUnLock(dat_fd); DpsClose(dat_fd); DPS_FREE(data); dps_snprintf(fname,sizeof(fname)-1,"%s%c%s%c%s.ind", vardir, DPSSLASH,DPS_TREEDIR, DPSSLASH, lim_name); if((ind_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) { DpsLog(Indexer, DPS_LOG_ERROR, "Can't open '%s': %s [%s:%d]", fname, strerror(errno), __FILE__, __LINE__); goto err1; } DpsWriteLock(ind_fd); if((nind*sizeof(DPS_UINT8_POS_LEN)) != (size_t)write(ind_fd,ind,nind*sizeof(DPS_UINT8_POS_LEN))){ DpsLog(Indexer, DPS_LOG_ERROR, "Can't write '%s': %s [%s:%d]", fname, strerror(errno), __FILE__, __LINE__); goto err1; } DpsUnLock(ind_fd); DpsClose(ind_fd); DPS_FREE(ind); return(0); err1: ClearIndex8(L); DPS_FREE(data); DPS_FREE(ind); if(dat_fd) DpsClose(dat_fd); if(ind_fd) DpsClose(ind_fd); return(1); }
int DpsAddStackItem(DPS_AGENT *query, DPS_RESULT *Res, DPS_PREPARE_STATE *state, char *word, dpsunicode_t *uword) { int origin; size_t i; size_t wlen = (uword == NULL) ? 0 : DpsUniLen(uword); dpshash32_t crcword = (word == NULL) ? 0 : DpsStrHash32(word); #ifdef DEBUG_BOOL DpsLog(query, DPS_LOG_EXTRA, "0[%d].%x %c -- %s [%x] .secno:%d\n", state->order, state->origin, item_type(state->cmd), (word == NULL) ? "<NULL>" : word, crcword, state->secno[state->p_secno]); #endif if((uword != NULL) && ( DpsStopListFind(&query->Conf->StopWords, uword, state->qlang) || (query->WordParam.min_word_len > wlen) || (query->WordParam.max_word_len < wlen)) ) { origin = state->origin | DPS_WORD_ORIGIN_STOP; } else { origin = state->origin; } if (state->cmd == DPS_STACK_WORD && !(origin & DPS_WORD_ORIGIN_QUERY)) { for (i = 0; i < Res->nitems; i++) { if ((Res->items[i].order == state->order) && (Res->items[i].crcword == crcword)) return DPS_OK; } } if (Res->nitems >= Res->mitems - 2) { Res->mitems += DPS_MAXSTACK; Res->items = (DPS_STACK_ITEM*)DpsRealloc(Res->items, Res->mitems * sizeof(DPS_STACK_ITEM)); if (Res->items == NULL) { DpsLog(query, DPS_LOG_ERROR, "Can't alloc %d bytes for %d mitems", Res->mitems * sizeof(DPS_STACK_ITEM), Res->mitems); return DPS_ERROR; } } if (Res->nitems > 0) { if (state->cmd == DPS_STACK_OR || state->cmd == DPS_STACK_AND || state->cmd == DPS_STACK_NEAR || state->cmd == DPS_STACK_ANYWORD) { if (Res->items[Res->nitems-1].cmd == DPS_STACK_AND || Res->items[Res->nitems-1].cmd == DPS_STACK_OR || Res->items[Res->nitems-1].cmd == DPS_STACK_NEAR || Res->items[Res->nitems-1].cmd == DPS_STACK_ANYWORD) { return DPS_OK; } } if ((Res->nitems > 0) && (state->cmd == DPS_STACK_WORD) && ( (Res->items[Res->nitems-1].cmd == DPS_STACK_WORD) || (Res->items[Res->nitems-1].cmd == DPS_STACK_RIGHT) || (Res->items[Res->nitems-1].cmd == DPS_STACK_PHRASE_RIGHT) )) { Res->items[Res->nitems].cmd = DPS_STACK_OR; Res->items[Res->nitems].order = 0; Res->items[Res->nitems].origin = 0; Res->items[Res->nitems].count = 0; Res->items[Res->nitems].len = 0; Res->items[Res->nitems].crcword = 0; Res->items[Res->nitems].word = NULL; Res->items[Res->nitems].ulen = 0; Res->items[Res->nitems].uword = NULL; Res->items[Res->nitems].pbegin = NULL; Res->items[Res->nitems].order_origin = 0; Res->items[Res->nitems].secno = state->secno[state->p_secno]; Res->nitems++; Res->ncmds++; #ifdef DEBUG_BOOL DpsLog(query, DPS_LOG_EXTRA, "1[%d].%x %c -- %s", 0, 0, item_type(DPS_STACK_OR), "<NULL>"); #endif } if ((Res->nitems > 0) && (state->cmd == DPS_STACK_LEFT) && ( (Res->items[Res->nitems-1].cmd == DPS_STACK_RIGHT) || (Res->items[Res->nitems-1].cmd == DPS_STACK_PHRASE_RIGHT) )) { Res->items[Res->nitems].cmd = state->add_cmd; Res->items[Res->nitems].order = 0; Res->items[Res->nitems].origin = 0; Res->items[Res->nitems].count = 0; Res->items[Res->nitems].len = 0; Res->items[Res->nitems].crcword = 0; Res->items[Res->nitems].word = NULL; Res->items[Res->nitems].ulen = 0; Res->items[Res->nitems].uword = NULL; Res->items[Res->nitems].pbegin = NULL; Res->items[Res->nitems].order_origin = 0; Res->items[Res->nitems].secno = state->secno[state->p_secno]; Res->nitems++; Res->ncmds++; #ifdef DEBUG_BOOL DpsLog(query, DPS_LOG_EXTRA, "1[%d].%x %c -- %s", 0, 0, item_type(state->add_cmd), "<NULL>"); #endif } } Res->items[Res->nitems].cmd = state->cmd; Res->items[Res->nitems].order = state->order; Res->items[Res->nitems].order_inquery = state->order_inquery; Res->items[Res->nitems].origin = origin; Res->items[Res->nitems].count = 0; Res->items[Res->nitems].len = (word == NULL) ? 0 : dps_strlen(word); Res->items[Res->nitems].crcword = crcword; Res->items[Res->nitems].word = (word == NULL) ? NULL : DpsStrdup(word); Res->items[Res->nitems].ulen = wlen; Res->items[Res->nitems].uword = (uword == NULL) ? NULL : DpsUniDup(uword); Res->items[Res->nitems].pbegin = NULL; Res->items[Res->nitems].order_origin = 0; Res->items[Res->nitems].wordnum = Res->nitems; Res->items[Res->nitems].secno = state->secno[state->p_secno]; Res->nitems++; if (state->cmd != DPS_STACK_WORD) { Res->ncmds++; } else { Res->items[state->order].order_origin |= origin; if (state->order > Res->max_order) Res->max_order = state->order; if (state->order_inquery > Res->max_order_inquery) Res->max_order_inquery = state->order; } /* if ((state->cmd == DPS_STACK_WORD) && state->order > Res->max_order) Res->max_order = state->order;*/ #ifdef DEBUG_BOOL DpsLog(query, DPS_LOG_EXTRA, "1[%d,%d].%x %c -- %s", state->order, state->order_inquery, state->origin, item_type(state->cmd), (word == NULL) ? "<NULL>" : word); #endif return DPS_OK; }