示例#1
0
int __DPSCALL DpsResAddDocInfoSearchd(DPS_AGENT * query,DPS_DB *cl,DPS_RESULT * Res,size_t clnum){
	DPS_SEARCHD_PACKET_HEADER hdr;
	char * msg=NULL;
	size_t i; /* num=0,curnum=0;*/
	int done = 0;
	ssize_t nsent,nrecv;
	char * dinfo=NULL;
	int	rc=DPS_OK;
	char		*textbuf;
	size_t dlen = 0;
	
	TRACE_IN(query, "DpsResAddDocInfoSearchd");

	if(!Res->num_rows) { TRACE_OUT(query); return(DPS_OK); }
	
	for(i=0;i<Res->num_rows;i++){
	  size_t		ulen;
	  size_t		olen;
	  size_t		nsec, r;
	  DPS_DOCUMENT	*D=&Res->Doc[i];

	  r = (size_t) 's';
	  for(nsec = 0; nsec < D->Sections.Root[r].nvars; nsec++)
	    if (strcasecmp(D->Sections.Root[r].Var[nsec].name, "Score") == 0) D->Sections.Root[r].Var[nsec].section = 1;

#ifdef WITH_MULTIDBADDR
	  if (D->dbnum != cl->dbnum) continue;
#endif		  

	  textbuf = DpsDocToTextBuf(D, 1, 0);
	  if (textbuf == NULL) {TRACE_OUT(query); return DPS_ERROR;}
					
	  ulen = dps_strlen(textbuf)+2;
	  olen = dlen;
	  dlen = dlen + ulen;
	  dinfo = (char*)DpsRealloc(dinfo, dlen + 1);
	  if (dinfo == NULL) {
	    DpsFree(textbuf);
	    TRACE_OUT(query);
	    return DPS_ERROR;
	  }
	  dinfo[olen] = '\0';
	  sprintf(dinfo + olen, "%s\r\n", textbuf);
	  DpsFree(textbuf);
	}

	if (dinfo == NULL) {
	    TRACE_OUT(query);
	    return DPS_OK;
	}

	hdr.cmd=DPS_SEARCHD_CMD_DOCINFO;
	hdr.len = dps_strlen(dinfo);
	
	nsent = DpsSearchdSendPacket(cl->searchd, &hdr, dinfo);
#ifdef DEBUG_SDP
	DpsLog(query, DPS_LOG_ERROR, "Sent DOCINFO size=%d buf=%s\n", hdr.len, dinfo);
#endif				
	
	while(!done){
		char * tok, * lt;
		nrecv = DpsRecvall(cl->searchd, &hdr, sizeof(hdr), 360);
		
		if(nrecv!=sizeof(hdr)){
		  DpsLog(query, DPS_LOG_ERROR, "Received incomplete header from searchd (%d bytes, errno:%d)", (int)nrecv, errno);
			TRACE_OUT(query);
			return(DPS_ERROR);
		}else{
#ifdef DEBUG_SDP
		  DpsLog(query, DPS_LOG_ERROR, "Received header cmd=%d len=%d\n",hdr.cmd,hdr.len);
#endif
		}
		switch(hdr.cmd){
			case DPS_SEARCHD_CMD_ERROR:
				msg=(char*)DpsMalloc(hdr.len+1); 
				if (msg == NULL) {
				  done = 1;
				  break;
				}
				nrecv = DpsRecvall(cl->searchd, msg, hdr.len, 360);
				msg[(nrecv >= 0) ? nrecv : 0]='\0';
				sprintf(query->Conf->errstr,"Searchd error: '%s'",msg);
				rc=DPS_ERROR;
				DPS_FREE(msg);
				done=1;
				break;
			case DPS_SEARCHD_CMD_MESSAGE:
				msg=(char*)DpsMalloc(hdr.len+1);
				if (msg == NULL) {
				  done = 1;
				  break;
				}
				nrecv = DpsRecvall(cl->searchd, msg, hdr.len, 360);
				msg[(nrecv >= 0) ? nrecv : 0]='\0';
#ifdef DEBUG_SDP
				DpsLog(query, DPS_LOG_ERROR, "Message from searchd: '%s'\n",msg);
#endif
				DPS_FREE(msg);
				break;
			case DPS_SEARCHD_CMD_DOCINFO:
				dinfo = (char*)DpsRealloc(dinfo, hdr.len + 1);
				if (dinfo == NULL) {
				  done=1;
				  break;
				}
				nrecv = DpsRecvall(cl->searchd, dinfo, hdr.len, 360);
				dinfo[(nrecv > 0) ? nrecv : 0] = '\0';
#ifdef DEBUG_SDP
				DpsLog(query, DPS_LOG_ERROR, "Received DOCINFO size=%d buf=%s\n",hdr.len,dinfo);
#endif				
				tok = dps_strtok_r(dinfo, "\r\n", &lt, NULL);
				
				while(tok){
				  urlid_t Doc_url_id, Res_Doc_url_id;
					DPS_DOCUMENT Doc;
					
					DpsDocInit(&Doc);
					DpsDocFromTextBuf(&Doc,tok);
					Doc_url_id = (urlid_t)DpsVarListFindInt(&Doc.Sections, "DP_ID", 0);

					for(i=0;i<Res->num_rows;i++){				
#ifdef WITH_MULTIDBADDR
						if (Res->Doc[i].dbnum != cl->dbnum) continue;
#endif
						Res_Doc_url_id = (urlid_t)DpsVarListFindInt(&Res->Doc[i].Sections, "DP_ID", 0);
						if (Res_Doc_url_id == Doc_url_id) {
						  DpsDocFromTextBuf(&Res->Doc[i], tok);
						  break;
						}
					}
					tok = dps_strtok_r(NULL, "\r\n", &lt, NULL);
					DpsDocFree(&Doc);
				}
				DPS_FREE(dinfo);
				done=1;
				break;
			default:
				sprintf(query->Conf->errstr,"Unknown searchd response: cmd=%d len=%d",hdr.cmd,hdr.len);
				rc=DPS_ERROR;
				done=1;
				break;
		}
	}
	TRACE_OUT(query);
	return rc;
}
示例#2
0
/* Compute one operation and store result */
static int perform(DPS_AGENT *query, DPS_RESULT *Res, DPS_BOOLSTACK *s, int com) {
	DPS_STACK_ITEM res, *x1, *x2;
	int rc = DPS_OK, found, flag1;

	bzero(&res, sizeof(res));
	switch(com){
	        case DPS_STACK_PHRASE_LEFT:
		  x1 = POPARG(s);
		  if (x1 == NULL) {
		    bzero(&res, sizeof(res));
		    
		  } else {
		    res = *x1; /* FIXME: add checking ? */
		    if (res.order_from != res.order_to) {
		      DPS_URL_CRD_DB *w;
		      dps_uint4 *pos_real, *order_ideal, *order_real, *gap_ahead/*, *gap_back*/;
		      urlid_t curlid;
		      size_t nwords = res.order_to - res.order_from + 1, nonstop_words;
		      size_t p_cmp, p_ins;
		      res.plast = res.pbegin + res.count;
		      w = res.pcur = res.pchecked = res.pbegin;
		      if ((pos_real = (dps_uint4*)DpsMalloc(5 * nwords * sizeof(dps_uint4) + 1)) == NULL) {
			DpsLog(query, DPS_LOG_ERROR, "Can't alloc %d bytes %s:%d",(5 * nwords * sizeof(dps_uint4) + 1), __FILE__, __LINE__);
			return DPS_ERROR;
		      }
		      order_real = pos_real + nwords;
		      gap_ahead = order_real + nwords;
		      order_ideal = gap_ahead + nwords;
/*		    gap_back = order_ideal + nwords;*/
		      nonstop_words = 0;
		      { register size_t tt;
			for (tt = res.order_from; tt <= res.order_to; tt++) {
#ifdef DEBUG_BOOL
			  
#endif
			  if ((Res->items[tt].order_origin & DPS_WORD_ORIGIN_STOP) == 0) {
			    order_ideal[nonstop_words] = tt;
			    gap_ahead[nonstop_words] = (tt == res.order_to) ? 0 : 1;
			    { register size_t zz;
			      for (zz = tt + 1; zz <= res.order_to; zz++) {
				if (Res->items[zz].order_origin & DPS_WORD_ORIGIN_STOP) gap_ahead[nonstop_words]++;
				else break;
			      }
			    }
			    nonstop_words++;
			  }
			}
		      }

#ifdef DEBUG_BOOL
		      DpsLog(query, DPS_LOG_EXTRA, "nonstopwords: %d  nwords:%d", nonstop_words, nwords);
		      { register size_t tt;
			for (tt = 0; tt < nonstop_words; tt++) {
			  DpsLog(query, DPS_LOG_EXTRA, "%d:order_ideal:%d  gap_ahead:%d", tt, order_ideal[tt], gap_ahead[tt]);
			}
		      }
#endif
		      if (nonstop_words != 0) {
			while (res.pcur < res.plast) {
			  register size_t tt;
			  curlid = res.pcur->url_id;
			  found = 0;
			  p_ins = 0;
			  p_cmp = nwords - nonstop_words;
			  res.pchecked = res.pcur; /******* ? *****/
			  for (tt = 0; tt < nwords - nonstop_words; tt++) pos_real[tt] = 0;
			  for (tt = 0; (tt < nonstop_words) && (res.pcur < res.plast) && (res.pcur->url_id == curlid) ; tt++) {
			    order_real[p_ins] = Res->WWList.Word[DPS_WRDNUM(res.pcur->coord)].order;
			    pos_real[p_ins] = DPS_WRDPOS(res.pcur->coord);
			    while((res.pcur < res.plast) && (pos_real[p_ins] == DPS_WRDPOS(res.pcur->coord))) res.pcur++;
			    p_ins++; p_cmp++;
			    p_ins %= nwords;
			    p_cmp %= nwords;
			  }
			  if (tt == nonstop_words) {
/* [[[[[[ */
			    found = 1;
			    for (tt = 0; tt < nonstop_words; tt++) {
			      if (order_real[(p_cmp + tt) % nwords] != order_ideal[tt]) {
				found = 0; break;
			      }
			      if (gap_ahead[tt] && (tt + gap_ahead[tt] < nwords) && 
				  (pos_real[(p_cmp + tt) % nwords] + gap_ahead[tt] != pos_real[(p_cmp + tt + 1) % nwords])) {
				found = 0; break;
			      }
			    }
			    if (found) {
			      while((res.pchecked < res.pcur) /*&& (res.pchecked->url_id == curlid)*/) {
				*w = *res.pchecked;
				w++; res.pchecked++;
			      }
			      res.pcur = res.pchecked;
			    } else {
			      res.pchecked = res.pcur;
			    }
/* ]]]]]] */			    
			  }
			  while (/*(found == 0) &&*/ (res.pcur < res.plast) && (res.pcur->url_id == curlid)) {
			    order_real[p_ins] = Res->WWList.Word[DPS_WRDNUM(res.pcur->coord)].order;
			    pos_real[p_ins] = DPS_WRDPOS(res.pcur->coord);
			    while((res.pcur < res.plast) && (pos_real[p_ins] == DPS_WRDPOS(res.pcur->coord))) res.pcur++;
			    p_ins++; p_cmp++;
			    p_ins %= nwords;
			    p_cmp %= nwords;
/* [[[[[[ */
			    found = 1;
			    for (tt = 0; tt < nonstop_words; tt++) {
			      if (order_real[(p_cmp + tt) % nwords] != order_ideal[tt]) {
				found = 0; break;
			      }
			      if (gap_ahead[tt] && (tt + gap_ahead[tt] < nwords) && 
				  (pos_real[(p_cmp + tt) % nwords] + gap_ahead[tt] != pos_real[(p_cmp + tt + 1) % nwords])) {
				found = 0; break;
			      }
			    }
			    if (found) {
			      while((res.pchecked < res.pcur) /*&& (res.pchecked->url_id == curlid)*/) {
				*w = *res.pchecked;
				w++; res.pchecked++;
			      }
			      res.pcur = res.pchecked;
			    } else {
			      res.pchecked = res.pcur;
			    }
/* ]]]]]] */
			  }
			
			}
		      }
		      res.count = w - res.pbegin;
		      DPS_FREE(pos_real);
		    }
		  }
#ifdef DEBUG_BOOL
		  DpsLog(query, DPS_LOG_EXTRA,
			 "Perform <{%d}:%d:%d> ->{%d}", x1 ? x1->count : 0, x1 ? x1->order_from : 0, x1 ? x1->order_to : 0, res.count);
#endif
		  rc = PUSHARG(s, &res);
		  break;
		case DPS_STACK_OR:
			x1 = POPARG(s);
			x2 = POPARG(s);
			if (x2 == NULL || x1 == NULL) {
			  if (x1 != NULL) { res = *x1; x1 = NULL; }
			  if (x2 != NULL) { res = *x2; x2 = NULL; }
			} else {
#ifdef DEBUG_BOOL
/*			  printBoolRes(query, x1);*/
			  DpsLog(query, DPS_LOG_EXTRA, "^^^");
/*			  printBoolRes(query, x2);*/
#endif
			res.order_from = (x1->order_from <= x2->order_from) ? x1->order_from : x2->order_from;
			res.order_to = (x1->order_to >= x2->order_to) ? x1->order_to : x2->order_to;
			
			if (DPS_OK != proceedOR(query, &res, x1, x2)) return DPS_ERROR;
			DpsStackItemFree(x1); DpsStackItemFree(x2);
			res.count = res.pcur - res.pbegin;
			{ register size_t tt; int x1origin=0, x2origin=0;
			  for (tt = x1->order_from; tt <= x1->order_to; tt++) {
#ifdef DEBUG_BOOL
			    DpsLog(query, DPS_LOG_EXTRA, "\t\t\t\tx1order_origin[%d].%x ", tt, Res->items[tt].origin /*order_origin*/);
#endif
			    if (Res->items[tt].origin /*order_origin*/ & DPS_WORD_ORIGIN_STOP) {
			      x1origin = Res->items[tt].origin /*order_origin*/;
			      break;
			    }
			  }
			  for (tt = x2->order_from; tt <= x2->order_to; tt++) {
#ifdef DEBUG_BOOL
			    DpsLog(query, DPS_LOG_EXTRA, "\t\t\t\tx2order_origin[%d].%x ", tt, Res->items[tt].origin /*order_origin*/);
#endif
			    if (Res->items[tt].origin /*order_origin*/ & DPS_WORD_ORIGIN_STOP) {
			      x2origin = Res->items[tt].origin /*order_origin*/;
			      break;
			    }
			  }
			  
			  x1origin = x1->origin; x2origin = x2->origin;
#ifdef DEBUG_BOOL
			  DpsLog(query, DPS_LOG_EXTRA, "\t\t\t\tx1origin.%x x2origin.%x", x1origin, x2origin);
#endif


			  if ((x1origin & (DPS_WORD_ORIGIN_STOP | DPS_WORD_ORIGIN_QUERY)) == (DPS_WORD_ORIGIN_STOP|DPS_WORD_ORIGIN_QUERY)) {
			    res.origin = x1origin;
			  } else
			  if ((x2origin & (DPS_WORD_ORIGIN_STOP | DPS_WORD_ORIGIN_QUERY)) == (DPS_WORD_ORIGIN_STOP|DPS_WORD_ORIGIN_QUERY)) {
			    res.origin = x2origin;
			  } else
			  if (((x1origin & DPS_WORD_ORIGIN_STOP) && (x2origin & DPS_WORD_ORIGIN_STOP)) ||
			      ( (res.count == 0) && 
				((x1origin & DPS_WORD_ORIGIN_STOP) || (x2origin & DPS_WORD_ORIGIN_STOP))))
			    res.origin = DPS_WORD_ORIGIN_STOP;
			  if ((x1origin & DPS_WORD_ORIGIN_ACRONYM) && (x1origin & DPS_WORD_ORIGIN_ACRONYM)) 
			    res.origin |= DPS_WORD_ORIGIN_ACRONYM;
			}
			}
#ifdef DEBUG_BOOL
			DpsLog(query, DPS_LOG_EXTRA, "Perform {%d}.%x | {%d}.%x -> {%d}.%x",
			       (x1) ? x1->count:-1, (x1)?x1->origin:-1, (x2)?x2->count : -1, (x2) ? x2->origin : -1, res.count, res.origin);
/*			printBoolRes(query, &res);*/
			DpsLog(query, DPS_LOG_EXTRA, "===");
#endif
			rc = PUSHARG(s, &res);
			break;
	        case DPS_STACK_NEAR:
			x1 = POPARG(s);
			x2 = POPARG(s);
			if (x2 == NULL || x1 == NULL) {
			  if (x1 != NULL) { res = *x1; x1 = NULL; }
			  if (x2 != NULL) { res = *x2; x2 = NULL; }
			} else {
			  res.order_from = (x1->order_from <= x2->order_from) ? x1->order_from : x2->order_from;
			  res.order_to = (x1->order_to >= x2->order_to) ? x1->order_to : x2->order_to;
			  if ((x1->origin & DPS_WORD_ORIGIN_STOP) && (x2->origin & DPS_WORD_ORIGIN_STOP) ) {
			    if (DPS_OK != proceedOR(query, &res, x1, x2)) return DPS_ERROR;
			    res.origin = DPS_WORD_ORIGIN_STOP;
			  } else if (x2->origin & DPS_WORD_ORIGIN_STOP) {
			    if (DPS_OK != proceedSTOP(query, &res, x1, x2)) return DPS_ERROR;
			  } else if (x1->origin & DPS_WORD_ORIGIN_STOP ) {
			    if (DPS_OK != proceedSTOP(query, &res, x2, x1)) return DPS_ERROR;
			  } else if (!((x1->cmd & DPS_STACK_WORD_NOT) && (x2->cmd & DPS_STACK_WORD_NOT))) {
#ifdef DEBUG_BOOL
/*			    printBoolRes(query, x1);*/
			    DpsLog(query, DPS_LOG_EXTRA, "^^^");
			    DpsLog(query, DPS_LOG_DEBUG, "x1.NOT: %d  x2.NOT: %d", x1->cmd & DPS_STACK_WORD_NOT, x2->cmd & DPS_STACK_WORD_NOT);
/*			    printBoolRes(query, x2);*/
#endif
			    res.pbegin = res.pcur = (DPS_URL_CRD_DB*)DpsMalloc((x1->count + x2->count + 1) * sizeof(DPS_URL_CRD_DB));
			    if (res.pbegin == NULL) return DPS_ERROR;
			    x1->pcur = x1->pbegin; x1->plast = x1->pbegin + x1->count;
			    x2->pcur = x2->pbegin; x2->plast = x2->pbegin + x2->count;
			    if (x1->cmd & DPS_STACK_WORD_NOT) {
			      register DPS_STACK_ITEM *t = x1;
			      x1 = x2; x2 = t;
			    }
			    while (x1->pcur < x1->plast && x2->pcur < x2->plast) {
			      while ((x2->pcur < x2->plast) && (x2->pcur->url_id < x1->pcur->url_id)) x2->pcur++;
			      if (x2->pcur >= x2->plast) break;
			      if (x2->pcur->url_id == x1->pcur->url_id) {
				dps_uint4 pos1 = DPS_WRDPOS(x1->pcur->coord);
				dps_uint4 pos2 = DPS_WRDPOS(x2->pcur->coord);
				register urlid_t curlid = x1->pcur->url_id;
				if (pos1 > pos2) { found = ((pos2 + 16) >= pos1);
				} else { found = ((pos1 + 16) >= pos2);
				}
				x1->pchecked = x1->pcur; x2->pchecked = x2->pcur;
				while ((!found) && (x1->pchecked < x1->plast) && (x2->pchecked < x2->plast) 
				       /*&& (x1->pchecked->url_id == x2->pchecked->url_id)*/ ) {
				  if (x1->pchecked->coord <= x2->pchecked->coord) {
				    x1->pchecked++;
				    pos1 = DPS_WRDPOS(x1->pchecked->coord);
				    if (x1->pchecked->url_id != curlid) break;
				  } else {
				    x2->pchecked++;
				    pos2 = DPS_WRDPOS(x2->pchecked->coord);
				    if (x2->pchecked->url_id != curlid) break;
				  }
				  if (pos1 > pos2) { found = ((pos2 + 16) >= pos1);
				  } else { found = ((pos1 + 16) >= pos2);
				  }
				}
				if (x2->cmd & DPS_STACK_WORD_NOT || x1->cmd & DPS_STACK_WORD_NOT) found = !found;
				if (found) {

				  while ((x1->pcur < x1->plast) && (x2->pcur < x2->plast) /*&& (x1->pcur->url_id == x2->pcur->url_id)*/) {
				    if (x1->pcur->coord <= x2->pcur->coord) {
				      *res.pcur = *x1->pcur;
				      res.pcur++; x1->pcur++; if (x1->pcur->url_id != curlid) break;
				    } else {
				      *res.pcur = *x2->pcur;
				      res.pcur++; x2->pcur++; if (x2->pcur->url_id != curlid) break;
				    }
				  }
				  while ((x1->pcur < x1->plast) && (x1->pcur->url_id == curlid)) {
				    *res.pcur = *x1->pcur;
				    res.pcur++; x1->pcur++;
				  }
				  while ((x2->pcur < x2->plast) && (x2->pcur->url_id == curlid)) {
				    *res.pcur = *x2->pcur;
				    res.pcur++; x2->pcur++;
				  }

				} else {
				  x1->pcur = x1->pchecked; x2->pcur = x2->pchecked;
				  while ((x1->pcur < x1->plast) && (x1->pcur->url_id == curlid)) x1->pcur++;
				  while ((x2->pcur < x2->plast) && (x2->pcur->url_id == curlid)) x2->pcur++;
				}
			      } else {
				register DPS_STACK_ITEM *t = x1;
				x1 = x2; x2 = t;
			      }
			    }
			  }
			}
			DpsStackItemFree(x1); DpsStackItemFree(x2);
			res.count = res.pcur - res.pbegin;
#ifdef DEBUG_BOOL
			DpsLog(query, DPS_LOG_EXTRA,"Perform {%d}.%x NEAR {%d}.%x - > %d.%d", 
			       (x1)?x1->count:-1, (x1)?x1->origin:-1, (x2) ? x2->count : -1, (x2) ? x2->origin: - 1, res.count, res.origin);
/*			printBoolRes(query, &res);*/
			DpsLog(query, DPS_LOG_EXTRA, "===");
#endif
			rc = PUSHARG(s, &res);
			break;
	        case DPS_STACK_ANYWORD:
			x1 = POPARG(s);
			x2 = POPARG(s); flag1 = 0;
			if (x2 == NULL || x1 == NULL) {
			  if (x1 != NULL) { res = *x1; x1 = NULL; }
			  if (x2 != NULL) { res = *x2; x2 = NULL; }
			} else {
			  res.order_from = (x1->order_from <= x2->order_from) ? x1->order_from : x2->order_from;
			  res.order_to = (x1->order_to >= x2->order_to) ? x1->order_to : x2->order_to;
			  if ((x1->origin & DPS_WORD_ORIGIN_STOP) && (x2->origin & DPS_WORD_ORIGIN_STOP) ) {
			    if (DPS_OK != proceedOR(query, &res, x1, x2)) return DPS_ERROR;
			    res.origin = DPS_WORD_ORIGIN_STOP;
			  } else if (x2->origin & DPS_WORD_ORIGIN_STOP) {
			    if (DPS_OK != proceedSTOP(query, &res, x1, x2)) return DPS_ERROR;
			  } else if (x1->origin & DPS_WORD_ORIGIN_STOP ) {
			    if (DPS_OK != proceedSTOP(query, &res, x2, x1)) return DPS_ERROR;
			  } else if (!((x1->cmd & DPS_STACK_WORD_NOT) && (x2->cmd & DPS_STACK_WORD_NOT))) {
			    res.pbegin = res.pcur = (DPS_URL_CRD_DB*)DpsMalloc((x1->count + x2->count + 1) * sizeof(DPS_URL_CRD_DB));
			    if (res.pbegin == NULL) return DPS_ERROR;
			    x1->pcur = x1->pbegin; x1->plast = x1->pbegin + x1->count;
			    x2->pcur = x2->pbegin; x2->plast = x2->pbegin + x2->count;
			    if (x1->cmd & DPS_STACK_WORD_NOT) {
			      register DPS_STACK_ITEM *t = x1;
			      x1 = x2; x2 = t; flag1 = !flag1;
			    }
			    while (x1->pcur < x1->plast && x2->pcur < x2->plast) {
			      while ((x2->pcur < x2->plast) && (x2->pcur->url_id < x1->pcur->url_id)) x2->pcur++;
			      if (x2->pcur >= x2->plast) break;
			      if (x2->pcur->url_id == x1->pcur->url_id) {
				dps_int4 pos1 = (dps_int4)DPS_WRDPOS(x1->pcur->coord);
				dps_int4 pos2 = (dps_int4)DPS_WRDPOS(x2->pcur->coord);
				register urlid_t curlid = x1->pcur->url_id;
				found = ((flag1) ? ((pos1 + 2) == pos2) : ((pos2 + 2) == pos1));
				x1->pchecked = x1->pcur; x2->pchecked = x2->pcur;
				while ((!found) && (x1->pchecked < x1->plast) && (x2->pchecked < x2->plast) 
				       && (x1->pchecked->url_id == x2->pchecked->url_id)) {
				  if (x1->pchecked->coord <= x2->pchecked->coord) {
				    x1->pchecked++;
				    pos1 = (dps_int4)DPS_WRDPOS(x1->pchecked->coord);
				  } else {
				    x2->pchecked++;
				    pos2 = (dps_int4)DPS_WRDPOS(x1->pchecked->coord);
				  }
				  found = ((flag1) ? ((pos1 + 2) == pos2) : ((pos2 + 2) == pos1));
				}
				if (x2->cmd & DPS_STACK_WORD_NOT || x1->cmd & DPS_STACK_WORD_NOT) found = !found;
				if (found) {
				  while ((x1->pcur < x1->plast) && (x2->pcur < x2->plast) && (x1->pcur->url_id == x2->pcur->url_id)) {
				    if (x1->pcur->coord <= x2->pcur->coord) {
				      *res.pcur = *x1->pcur;
				      res.pcur++; x1->pcur++;
				    } else {
				      *res.pcur = *x2->pcur;
				      res.pcur++; x2->pcur++;
				    }
				  }
				  while ((x1->pcur < x1->plast) && (x1->pcur->url_id == curlid)) {
				    *res.pcur = *x1->pcur;
				    res.pcur++; x1->pcur++;
				  }
				  while ((x2->pcur < x2->plast) && (x2->pcur->url_id == curlid)) {
				    *res.pcur = *x2->pcur;
				    res.pcur++; x2->pcur++;
				  }
				} else {
				  x1->pcur = x1->pchecked; x2->pcur = x2->pchecked;
				  while ((x1->pcur < x1->plast) && (x1->pcur->url_id == curlid)) x1->pcur++;
				  while ((x2->pcur < x2->plast) && (x2->pcur->url_id == curlid)) x2->pcur++;
				}
			      } else {
				register DPS_STACK_ITEM *t = x1;
				x1 = x2; x2 = t; flag1 = !flag1;
			      }
			    }
			  }
			}
			DpsStackItemFree(x1); DpsStackItemFree(x2);
			res.count = res.pcur - res.pbegin;
#ifdef DEBUG_BOOL
			DpsLog(query, DPS_LOG_EXTRA, "Perform {%d} ANYWORD {%d} - > %d", 
			       (x1) ? x1->count : -1, (x2) ? x2->count : -1, res.count);
#endif
			rc = PUSHARG(s, &res);
			break;
		case DPS_STACK_AND:
			x1 = POPARG(s);
			x2 = POPARG(s);
			if (x2 == NULL || x1 == NULL) {
			  if (x1 != NULL) { res = *x1; x1 = NULL; }
			  if (x2 != NULL) { res = *x2; x2 = NULL; }
			} else {
			  res.order_from = (x1->order_from <= x2->order_from) ? x1->order_from : x2->order_from;
			  res.order_to = (x1->order_to >= x2->order_to) ? x1->order_to : x2->order_to;
			  if ((x1->origin & DPS_WORD_ORIGIN_STOP) && (x2->origin & DPS_WORD_ORIGIN_STOP) ) {
			    if (DPS_OK != proceedOR(query, &res, x1, x2)) return DPS_ERROR;
			    res.origin = DPS_WORD_ORIGIN_STOP;
			  } else if (x2->origin & DPS_WORD_ORIGIN_STOP) {
			    if (DPS_OK != proceedSTOP(query, &res, x1, x2)) return DPS_ERROR;
			  } else if (x1->origin & DPS_WORD_ORIGIN_STOP ) {
			    if (DPS_OK != proceedSTOP(query, &res, x2, x1)) return DPS_ERROR;
			  } else if (!((x1->cmd & DPS_STACK_WORD_NOT) && (x2->cmd & DPS_STACK_WORD_NOT))) {
			    res.pbegin = res.pcur = (DPS_URL_CRD_DB*)DpsMalloc((x1->count + x2->count + 1) * sizeof(DPS_URL_CRD_DB));
			    if (res.pbegin == NULL) return DPS_ERROR;
			    x1->pcur = x1->pbegin; x1->plast = x1->pbegin + x1->count;
			    x2->pcur = x2->pbegin; x2->plast = x2->pbegin + x2->count;
			    if (x1->cmd & DPS_STACK_WORD_NOT) {
			      register DPS_STACK_ITEM *t = x1;
			      x1 = x2; x2 = t;
			    }
			    if (x2->cmd & DPS_STACK_WORD_NOT) {
			      while (x1->pcur < x1->plast && x2->pcur < x2->plast) {
				while ((x1->pcur < x1->plast) && (x1->pcur->url_id < x2->pcur->url_id)) {
				  *res.pcur = *x1->pcur;
				  res.pcur++; x1->pcur++;
				}
				while ((x2->pcur < x2->plast) && (x2->pcur->url_id < x1->pcur->url_id)) x2->pcur++;
			    
				if (x2->pcur->url_id == x1->pcur->url_id) {
				  register urlid_t curlid = x1->pcur->url_id;
				  while ((x1->pcur < x1->plast) && (x1->pcur->url_id == curlid)) x1->pcur++;
				  while ((x2->pcur < x2->plast) && (x2->pcur->url_id == curlid)) x2->pcur++;
				}
			      }
			      while (x1->pcur < x1->plast) {
				*res.pcur = *x1->pcur;
				res.pcur++; x1->pcur++;
			      }
			    } else {
#if 0
			      {
				DPS_URL_CRD_DB *w;
				for (w = x1->pcur; w < x1->plast; w++) {
				  fprintf(stderr, "x1.url_id:%d  .coord:%d\n", w->url_id, w->coord);
				}
				for (w = x2->pcur; w < x2->plast; w++) {
				  fprintf(stderr, "x2.url_id:%d  .coord:%d\n", w->url_id, w->coord);
				}
			      }
#endif

			      while (x1->pcur < x1->plast && x2->pcur < x2->plast) {
				while ((x2->pcur < x2->plast) && (x2->pcur->url_id < x1->pcur->url_id)) x2->pcur++;
				if (x2->pcur >= x2->plast) break;
				if (x2->pcur->url_id == x1->pcur->url_id) {
				  register urlid_t curlid = x1->pcur->url_id;
				  while ((x1->pcur < x1->plast) && (x2->pcur < x2->plast) && (x1->pcur->url_id == x2->pcur->url_id)) {
				    if (x1->pcur->coord <= x2->pcur->coord) {
				      *res.pcur = *x1->pcur;
				      res.pcur++; x1->pcur++;
				    } else {
				      *res.pcur = *x2->pcur;
				      res.pcur++; x2->pcur++;
				    }
				  }
				  while ((x1->pcur < x1->plast) && (x1->pcur->url_id == curlid)) {
				    *res.pcur = *x1->pcur;
				    res.pcur++; x1->pcur++;
				  }
				  while ((x2->pcur < x2->plast) && (x2->pcur->url_id == curlid)) {
				    *res.pcur = *x2->pcur;
				    res.pcur++; x2->pcur++;
				  }
				} else {
				  register DPS_STACK_ITEM *t = x1;
				  x1 = x2; x2 = t;
				}
			      }
			    }
			  }
			}
			DpsStackItemFree(x1); DpsStackItemFree(x2);
			res.count = res.pcur - res.pbegin;
#ifdef DEBUG_BOOL
#if 0
			{
			  DPS_URL_CRD_DB *w = res.pbegin;
			  size_t q;
			  for (q = 0; q < res.count; q++) {
			    fprintf(stderr, "res.url_id:%d  .coord:%d\n", w[q].url_id, w[q].coord);
			  }
			}
#endif
			DpsLog(query, DPS_LOG_EXTRA, "Perform {%d}.%x & {%d}.%x - > {%d}.%x", 
	       (x1) ? x1->count : -1, (x1) ? x1->origin : -1, (x2) ? x2->count : -1 , (x2) ? x2->origin : -1, res.count, res.origin);
/*			printBoolRes(query, &res);*/
			DpsLog(query, DPS_LOG_EXTRA, "===");
#endif
			rc = PUSHARG(s, &res);
			break;
		case DPS_STACK_NOT:
		        x1 = POPARG(s);
			/* res = x1 ? 0 : 1; */
			if (x1 != NULL) {
			  x1->cmd ^= DPS_STACK_WORD_NOT;
			  rc = PUSHARG(s, x1);
			}
#ifdef DEBUG_BOOL
			DpsLog(query, DPS_LOG_EXTRA, "Perform ~ {%d}", (x1) ? x1->count : -1);
#endif
			break;
	}
	return rc;
}
示例#3
0
void DpsParseHTTPResponse(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc) {			
  char	*token, *lt, *headers, savec;
	int     oldstatus;
	DPS_DSTR header;
	
	Doc->Buf.content=NULL;
	oldstatus = DpsVarListFindInt(&Doc->Sections, "Status", 0);
	DpsVarListReplaceInt(&Doc->Sections, "ResponseSize", (int)Doc->Buf.size);
	DpsVarListDel(&Doc->Sections, "Content-Length");
	DpsVarListDel(&Doc->Sections, "Last-Modified");

	if (Doc->Buf.buf == NULL) return;

	/* Cut HTTP response header first        */
	for(token=Doc->Buf.buf;*token;token++){
	  if(!strncmp(token,"\r\n\r\n",4)){
	    if (token <= Doc->Buf.buf + Doc->Buf.size - 4) {
			*token='\0';
			Doc->Buf.content = token + 4;
	    }
	    break;
	  } else if(!strncmp(token,"\n\n",2)){
	    if (token <= Doc->Buf.buf + Doc->Buf.size - 2) {
			*token='\0';
			Doc->Buf.content = token + 2;
	    }
	    break;
	  }
	}
	
	/* Bad response, return */
	if(!Doc->Buf.content) {
	  if (token <= Doc->Buf.buf + Doc->Buf.size - 4) {
	    if (token[2] == CR_CHAR) Doc->Buf.content = token + 4;
	    else Doc->Buf.content = token + 2;
	  }
	}
	
	/* Copy headers not to break them */
	headers = (char*)DpsStrdup(Doc->Buf.buf);
	
	/* Now lets parse response header lines */
	token = dps_strtok_r(headers, "\r\n", &lt, &savec);
	
	if(!token)return;
	
	if(!strncmp(token,"HTTP/",5)){
		int	status = atoi(token + 8);
		DpsVarListReplaceStr(&Doc->Sections,"ResponseLine",token);
		DpsVarListReplaceInt(&Doc->Sections, "Status", (oldstatus > status) ? oldstatus : status );
	}else{
	        DpsFree(headers);
		return;
	}
	token = dps_strtok_r(NULL, "\r\n", &lt, &savec);
	DpsDSTRInit(&header, 128);
	
	while(token){
	
		if(strchr(token,':')) {

		  if (header.data_size) {
		    DpsParseHTTPHeader(Indexer, Doc, &header);
		    DpsDSTRFree(&header);
		    DpsDSTRInit(&header, 128);
		  }

		}
		DpsDSTRAppendStr(&header, token);

		token = dps_strtok_r(NULL, "\r\n", &lt, &savec);
	}
	if (header.data_size) {
	  DpsParseHTTPHeader(Indexer, Doc, &header);
	}
	DpsDSTRFree(&header);
	DPS_FREE(headers);
	
	/* Bad response, return */
	if(!Doc->Buf.content) {
	    return;
	}
	DpsVarListReplaceInt(&Doc->Sections,"Content-Length", Doc->Buf.buf-Doc->Buf.content+(int)Doc->Buf.size + DpsVarListFindInt(&Doc->Sections,"Content-Length", 0));
}
示例#4
0
int DpsSEAMake(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc, DPS_DSTR *excerpt,  
	       const char *content_lang, size_t *indexed_size, size_t *indexed_limit, 
	       size_t max_word_len, size_t min_word_len, int crossec, int seasec
#ifdef HAVE_ASPELL
	       , int have_speller, AspellSpeller *speller
#endif
	       ) {
  DPS_SENTENCELIST List;
  DPS_MAPSTAT MapStat;
  DPS_TEXTITEM Item;
  DPS_VAR	*Sec;
  dpsunicode_t *sentence, *lt, savec;
  double *links, *lang_cs, w;
  double delta, pdiv, cur_div;
  size_t l, sent_len, order;
  size_t min_len = 10000000, min_pos = 0;
  int  it;
  register size_t i, j;
#ifdef DEBUG
  char lcstr[4096];

#endif

  TRACE_IN(Indexer, "DpsSEAMake");

  if((Sec = DpsVarListFind(&Doc->Sections, "sea"))) { /* set SEA section to NULL */
    DPS_FREE(Sec->val);
    DPS_FREE(Sec->txt_val);
    Sec->curlen = 0;
  }
  
  bzero(&List, sizeof(List));
  order = 0;
  sentence = DpsUniStrTok_SEA((dpsunicode_t*)excerpt->data, &lt);
  while(sentence) {
    if (lt != NULL) { savec = *lt; *lt = 0; }
#ifdef DEBUG
    DpsConv(&Indexer->uni_lc, lcstr, sizeof(lcstr), (char*)sentence, sizeof(dpsunicode_t) * (DpsUniLen(sentence) + 1));
    fprintf(stderr, "Sentence.%d: %s\n", List.nitems, lcstr);
#endif
    if ((sent_len = DpsUniLen(sentence)) >= Indexer->Flags.SEASentenceMinLength) {
      j = 1;
      for (i = 0; i < List.nitems; i++) {
	if (DpsUniStrCmp(sentence, List.Sent[i].sentence) == 0) {
	  j = 0; break;
	}
      }
      if (j) {
	if ( List.nitems < Indexer->Flags.SEASentences ) {
	  if (List.nitems == List.mitems) {
	    List.mitems += 16;
	    List.Sent = (DPS_SENTENCE*)DpsRealloc(List.Sent, List.mitems * sizeof(DPS_SENTENCE));
	    if (List.Sent == NULL) { TRACE_OUT(Indexer); return DPS_ERROR;}
	  }
	  List.Sent[List.nitems].sentence = DpsUniDup(sentence);
	  List.Sent[List.nitems].len = sent_len;
	  List.Sent[List.nitems].order = order++;
	  sentence = DpsUniDup(sentence);
	  DpsUniStrToLower(sentence);
	  bzero(&List.Sent[List.nitems].LangMap, sizeof(DPS_LANGMAP));
	  DpsBuildLangMap(&List.Sent[List.nitems].LangMap, (char*)sentence, sent_len * sizeof(dpsunicode_t), 0, 0);
	  if (sent_len < min_len) { min_len = sent_len; min_pos = List.nitems; }
	  List.nitems++;
	  DPS_FREE(sentence);
	} else if (sent_len > min_len) {
	  DPS_FREE(List.Sent[min_pos].sentence);
	  List.Sent[min_pos].sentence = DpsUniDup(sentence);
	  List.Sent[min_pos].len = sent_len;
	  List.Sent[min_pos].order = order++;
	  sentence = DpsUniDup(sentence);
	  DpsUniStrToLower(sentence);
	  bzero(&List.Sent[min_pos].LangMap, sizeof(DPS_LANGMAP));
	  DpsBuildLangMap(&List.Sent[min_pos].LangMap, (char*)sentence, sent_len * sizeof(dpsunicode_t), 0, 0);
	  DPS_FREE(sentence);
	  min_len = List.Sent[0].len; min_pos = 0;
	  for(i = 1; i < List.nitems; i++) if (List.Sent[i].len < min_len) { min_len = List.Sent[i].len; min_pos = i; }
	}
      }
    }
#ifdef DEBUG
    fprintf(stderr, "Sent. len.:%d, Min.allowed: %d\n", sent_len, Indexer->Flags.SEASentenceMinLength);
#endif
    if (lt != NULL) *lt = savec;
    sentence = DpsUniStrTok_SEA(NULL, &lt);
  }
  DpsLog(Indexer, DPS_LOG_DEBUG, "SEA sentences: %d", List.nitems);
  if (List.nitems < 4) {
    for (i = 0; i < List.nitems; i++) DPS_FREE(List.Sent[i].sentence);
    DPS_FREE(List.Sent); 
    TRACE_OUT(Indexer);
    return DPS_OK; 
  }

  links = (double*)DpsMalloc(sizeof(double) * List.nitems * List.nitems);
  lang_cs = (double*)DpsMalloc(sizeof(double) * List.nitems);
/*
        k                 ot
  links[i * List.nitems + j] 
*/

  if (links != NULL && lang_cs != NULL) {

    for (i = 0; i < List.nitems; i++) {
      DpsPrepareLangMap(&List.Sent[i].LangMap);
    }

    for (i = 0; i < List.nitems; i++) {
      List.Sent[i].Oi =  List.Sent[i].di = 0.5;
      if (Doc->lang_cs_map == NULL) {
	  links[i * List.nitems + i] = 0.0;
      } else {
	MapStat.map = &List.Sent[i].LangMap;
	DpsCheckLangMap6(Doc->lang_cs_map, &List.Sent[i].LangMap, &MapStat, DPS_LM_TOPCNT * DPS_LM_TOPCNT, 2 * DPS_LM_TOPCNT);
	links[i * List.nitems + i] = (double)MapStat.hits / (2.0 * DPS_LM_TOPCNT) / (List.nitems + 1);
      }
#ifdef DEBUG
      DpsLog(Indexer, DPS_LOG_INFO, "Link %u->%u: %f [hits:%d miss:%d]", i, i, links[i * List.nitems + i], MapStat.hits, MapStat.miss);
#endif
      for (j = 0; j < List.nitems; j++) {
	  if (j == i) continue;
	MapStat.map = &List.Sent[j].LangMap;
	DpsCheckLangMap6(&List.Sent[j].LangMap, &List.Sent[i].LangMap, &MapStat, DPS_LM_TOPCNT * DPS_LM_TOPCNT, 2 * DPS_LM_TOPCNT);

	links[i * List.nitems + j] = (double)MapStat.hits / (2.0 * DPS_LM_TOPCNT) / (List.nitems + 1);
#ifdef DEBUG
	DpsLog(Indexer, DPS_LOG_INFO, "Link %u->%u: %f [hits:%d miss:%d]", i, j, links[i * List.nitems + j], MapStat.hits, MapStat.miss);
#endif
      }
    }

    for (l = 0; l < List.nitems; l++) {
	w = 0.0;
	for (i = 0; i < List.nitems; i++) { 
	    w += links[l * List.nitems + i] * List.Sent[i].Oi;
	}
	w = f(w);
	if (w < LOW_BORDER_EPS2) w = LOW_BORDER_EPS2;
	else if (w > HI_BORDER_EPS2) w = HI_BORDER_EPS2;
	List.Sent[l].di = w;
    }

    DpsSort(List.Sent, List.nitems, sizeof(DPS_SENTENCE), (qsort_cmp)SentCmp);

#ifdef DEBUG
    DpsConv(&Indexer->uni_lc, lcstr, sizeof(lcstr), (char*)List.Sent[0].sentence, sizeof(dpsunicode_t) * (DpsUniLen(List.Sent[0].sentence) + 1));
    fprintf(stderr, "Sent.0: %f %f -- %s\n", List.Sent[0].di, List.Sent[0].Oi, lcstr);
    DpsConv(&Indexer->uni_lc, lcstr, sizeof(lcstr), (char*)List.Sent[1].sentence, sizeof(dpsunicode_t) * (DpsUniLen(List.Sent[1].sentence) + 1));
    fprintf(stderr, "Sent.1: %f %f -- %s\n", List.Sent[1].di, List.Sent[1].Oi, lcstr);
    DpsConv(&Indexer->uni_lc, lcstr, sizeof(lcstr), (char*)List.Sent[2].sentence, sizeof(dpsunicode_t) * (DpsUniLen(List.Sent[2].sentence) + 1));
    fprintf(stderr, "Sent.2: %f %f -- %s\n", List.Sent[2].di, List.Sent[2].Oi, lcstr);
    DpsConv(&Indexer->uni_lc, lcstr, sizeof(lcstr), (char*)List.Sent[3].sentence, sizeof(dpsunicode_t) * (DpsUniLen(List.Sent[3].sentence) + 1));
    fprintf(stderr, "Sent.3: %f %f -- %s\n", List.Sent[3].di, List.Sent[3].Oi, lcstr);
    DpsConv(&Indexer->uni_lc, lcstr, sizeof(lcstr), (char*)List.Sent[4].sentence, sizeof(dpsunicode_t) * (DpsUniLen(List.Sent[4].sentence) + 1));
    fprintf(stderr, "Sent.4: %f %f -- %s\n", List.Sent[4].di, List.Sent[4].Oi, lcstr);
#endif
    DpsSort(List.Sent, TOP_SENTENCES, sizeof(DPS_SENTENCE), (qsort_cmp)SentOrderCmp);

    bzero(&Item, sizeof(Item));
    Item.section = seasec;
    Item.href = NULL;
    Item.section_name = "sea";
    for (i = 0; i < TOP_SENTENCES; i++) {
      dpsunicode_t *UStr = DpsUniDup(List.Sent[i].sentence);
      DpsPrepareItem(Indexer, Doc, &Item, List.Sent[i].sentence, UStr, content_lang, indexed_size, indexed_limit,
		     max_word_len, min_word_len, crossec
#ifdef HAVE_ASPELL
		     , have_speller, speller, NULL
#endif
		     );
      DPS_FREE(UStr);
    }
  }
  DPS_FREE(lang_cs);
  DPS_FREE(links);
  for (i = 0; i < List.nitems; i++) DPS_FREE(List.Sent[i].sentence);
  DPS_FREE(List.Sent);

  TRACE_OUT(Indexer);
  return DPS_OK;
}
示例#5
0
void DpsBoolStackFree(DPS_BOOLSTACK *s) {
	DPS_FREE(s->cstack); DPS_FREE(s->astack);
	if (s->freeme) DPS_FREE(s); 
}
示例#6
0
int DpsCookiesAdd(DPS_AGENT *Indexer, const char *domain, const char * path, const char *name, const char *value, const char secure,
		  dps_uint4 expires, const char from_config, int insert_flag) {
#ifdef HAVE_SQL

  char buf[3*PATH_MAX];
  char path_esc[2*PATH_MAX+1];
  DPS_COOKIES *Cookies = &Indexer->Cookies;
  DPS_COOKIE *Coo;
  DPS_DB *db;
  dpshash32_t url_id = DpsStrHash32(domain);
  size_t i;
#ifdef WITH_PARANOIA
  void *paran = DpsViolationEnter(paran);
#endif

  if (Indexer->flags & DPS_FLAG_UNOCON) {
    if (Indexer->Conf->dbl.nitems == 0) return DPS_OK;
    DPS_GETLOCK(Indexer, DPS_LOCK_DB);
    db = Indexer->Conf->dbl.db[url_id % Indexer->Conf->dbl.nitems];
  } else {
    if (Indexer->dbl.nitems == 0) return DPS_OK;
    db = Indexer->dbl.db[url_id % Indexer->dbl.nitems];
  }
  (void)DpsDBEscStr(db, path_esc, DPS_NULL2EMPTY(path), dps_min(PATH_MAX,dps_strlen(DPS_NULL2EMPTY(path))));

  for (i = 0; i < Cookies->ncookies; i++) {
    Coo = &Cookies->Cookie[i];
    if (!strcasecmp(Coo->domain, domain) && !strcasecmp(Coo->path, DPS_NULL2EMPTY(path)) && !strcasecmp(Coo->name, name) && (Coo->secure == secure)/* && (Coo->from_config == from_config)*/ ) {
      DPS_FREE(Coo->value);
      Coo->value = DpsStrdup(value);
/*      Coo->expires = expires;*/
      if (insert_flag) {
	dps_snprintf(buf, sizeof(buf), "UPDATE cookies SET value='%s',expires=%d WHERE domain='%s' AND path='%s' AND name='%s' AND secure='%c'",
		     value, expires, domain, path_esc, name, secure);
	DpsSQLAsyncQuery(db, NULL, buf);
      }
      if (Indexer->flags & DPS_FLAG_UNOCON) DPS_RELEASELOCK(Indexer, DPS_LOCK_DB);
#ifdef WITH_PARANOIA
      DpsViolationExit(Indexer->handle, paran);
#endif
      return DPS_OK;
    }
  }

  Cookies->Cookie = (DPS_COOKIE*)DpsRealloc(Cookies->Cookie, (Cookies->ncookies + 1) * sizeof(DPS_COOKIE));
  if(Cookies->Cookie == NULL) {
    Cookies->ncookies = 0;
    if (Indexer->flags & DPS_FLAG_UNOCON) DPS_RELEASELOCK(Indexer, DPS_LOCK_DB);
#ifdef WITH_PARANOIA
    DpsViolationExit(Indexer->handle, paran);
#endif
    return DPS_ERROR;
  }
  Coo = &Cookies->Cookie[Cookies->ncookies];
/*  Coo->expires = expires;*/
  Coo->secure = secure;
  Coo->from_config = from_config;
  Coo->domain = DpsStrdup(domain);
  Coo->path = DpsStrdup(path);
  Coo->name = DpsStrdup(name);
  Coo->value = DpsStrdup(value);
  if (insert_flag) {
    if (Indexer->Flags.CheckInsertSQL) {
      dps_snprintf(buf, sizeof(buf), "DELETE FROM cookies WHERE domain='%s' AND path='%s' AND name='%s' AND secure='%c'",
		   domain, path_esc, name, secure);
      DpsSQLAsyncQuery(db, NULL, buf);
    }
    dps_snprintf(buf, sizeof(buf), "INSERT INTO cookies(expires,secure,domain,path,name,value)VALUES(%d,'%c','%s','%s','%s','%s')",
		 expires, secure, domain, path_esc, name, value);
    DpsSQLAsyncQuery(db, NULL, buf);
  }
  Cookies->ncookies++;
  if (Indexer->flags & DPS_FLAG_UNOCON) DPS_RELEASELOCK(Indexer, DPS_LOCK_DB);
#ifdef WITH_PARANOIA
  DpsViolationExit(Indexer->handle, paran);
#endif

#endif /*HAVE_SQL*/
  return DPS_OK;
}
示例#7
0
int main(int argc, char ** argv, char **envp) {
	const char	*env, *bcharset, *lcharset, *conf_dir;
	char		template_name[PATH_MAX+6]="";
	char            *template_filename = NULL;
	char		*query_string = NULL;
	char		self[1024]="";
	char		*url = NULL;
	const char      *ResultContentType;
	int		res,httpd=0;
	size_t          catcolumns = 0;
	int		page_size,page_number;
	DPS_ENV		*Env;
	DPS_AGENT	*Agent;
	DPS_VARLIST	query_vars;
	
	/* Output Content-type if under HTTPD	 */
	/* Some servers do not pass QUERY_STRING */
	/* if the query was empty, so check	 */
	/* REQUEST_METHOD too     to be safe     */
	
	httpd=(getenv("QUERY_STRING")||getenv("REQUEST_METHOD"));
	if (!(conf_dir=getenv("DPS_ETC_DIR")))
		conf_dir=DPS_CONF_DIR;
	
	
	DpsInit(argc, argv, envp);
	Env=DpsEnvInit(NULL);
	if (Env == NULL) {
	  if(httpd){
	    printf("Content-Type: text/plain\r\n\r\n");
	  }
	  printf("Can't alloc Env\n");
	  exit(0);
	}
	DpsVarListInit(&query_vars);
	Agent = DpsAgentInit(NULL, Env, 0);
	if (Agent == NULL) {
	  if(httpd){
	    printf("Content-Type: text/plain\r\n\r\n");
	  }
	  printf("Can't alloc Agent\n");
	  exit(0);
	}
	DpsVarListAddEnviron(&Env->Vars,"ENV");
	
	/* Detect self and template name */
	if((env = getenv("DPSEARCH_TEMPLATE")))
		dps_strncpy(template_name, env, sizeof(template_name) - 1);
	else if((env = getenv("PATH_INFO")) && env[0])
		dps_strncpy(template_name, env + 1, sizeof(template_name) - 1);
	
	if((env=getenv("DPSEARCH_SELF")))
		dps_strncpy(self,env,sizeof(self)-1);
	
	if((env=getenv("QUERY_STRING"))){
	        query_string = (char*)DpsRealloc(query_string, dps_strlen(env) + 2);
		if (query_string == NULL) {
		  if(httpd){
		    printf("Content-Type: text/plain\r\n\r\n");
		  }
		  printf("Can't alloc query_string\n");
		  exit(0);
		}
		dps_strncpy(query_string, env, dps_strlen(env) + 1);

		/* Hack for Russian Apache from apache.lexa.ru  */
		/* QUERY_STRING is already converted to server  */
		/* character set. We must print original query  */
		/* string instead however. Under usual apache   */ 
		/* we'll use QUERY_STRING. Note that query_vars */
		/* list will contain not unescaped values, so   */
		/* we don't have to escape them when displaying */
		env = getenv("CHARSET_SAVED_QUERY_STRING");
		DpsParseQStringUnescaped(&query_vars,env?env:query_string);
	
		/* Unescape and save variables from QUERY_STRING */
		/* Env->Vars will have unescaped values however  */
		DpsParseQueryString(Agent,&Env->Vars,query_string);
	
		template_filename = (char*)DpsStrdup(DpsVarListFindStr(&Env->Vars, "tmplt", ""));
	
		if((env=getenv("REDIRECT_STATUS"))){

			/* Check Apache internal redirect  */
			/* via   "AddHandler" and "Action" */
			if(!self[0]){
				dps_strncpy(self,(env=getenv("REDIRECT_URL"))?env:"filler.cgi",sizeof(self)-1);
			}
			if(!template_name[0]){
				dps_strncpy(template_name,(env=getenv("PATH_TRANSLATED"))?env:"",sizeof(template_name)-1);
			}
			if (*template_filename == '\0') { 
			  DPS_FREE(template_filename); 
			  template_filename = (char*)DpsStrdup("filler.htm"); 
			}
		}else{
			/* CGI executed without Apache internal redirect */

			/* Detect $Self variable with OS independant SLASHES */
			if(!self[0]){
				dps_strncpy(self,(env=getenv("SCRIPT_NAME"))?env:"filler.cgi",sizeof(self)-1);
			}

			if(!template_name[0]){
				char *s,*e;
				
				/*This is with OS specific SLASHES */
				env=((env=getenv("SCRIPT_FILENAME"))?env:"filler.cgi");

				if(strcmp(conf_dir,".")){
					/* Take from the config directory */
					dps_snprintf(template_name, sizeof(template_name)-1, "%s/%s", 
						     conf_dir,(s=strrchr(env,DPSSLASH))?(s+1):(self));
				}else{
					/* Take from the current directory */
					dps_strncpy(template_name,env,sizeof(template_name)-1);
				}

				/* Find right slash if it presents */
				s=((s=strrchr(template_name,DPSSLASH))?s:template_name);

				if (*template_filename == '\0') {

				  /* Find .cgi substring */
				  if ((e = strstr(s, ".cgi")) != NULL) {
					/* Replace ".cgi" with ".htm" */
					e[1]='h';e[2]='t';e[3]='m';
				  } else {
				        dps_strcat(s, ".htm");
				  }
				  e = strrchr(s, '/');
				  DPS_FREE(template_filename);
				  template_filename = (char*)DpsStrdup(e + 1);
				} else {
				  dps_strncpy(s + 1, template_filename, sizeof(template_name) - (s - template_name) - 2);
				}
			}
		}
	}else{
		/* Executed from command line     */
		/* or under server which does not */
		/* pass an empty QUERY_STRING var */
		if(argv[1]) {
		  query_string = (char*)DpsRealloc(query_string, dps_strlen(argv[1]) + 10);
		  if (query_string == NULL) {
		    if(httpd){
		      printf("Content-Type: text/plain\r\n\r\n");
		    }
		    printf("Can't realloc query_string\n");
		    exit(0);
		  }
		  sprintf(query_string, "q=%s", argv[1]);
		} else {
		  query_string = (char*)DpsRealloc(query_string, 1024);
		  if (query_string == NULL) {
		    if(httpd){
		      printf("Content-Type: text/plain\r\n\r\n");
		    }
		    printf("Can't realloc query_string\n");
		    exit(0);
		  }
		  sprintf(query_string, "q=");
		}

		/* Hack for Russian Apache from apache.lexa.ru  */
		/* QUERY_STRING is already converted to server  */
		/* character set. We must print original query  */
		/* string instead however. Under usual apache   */ 
		/* we'll use QUERY_STRING. Note that query_vars */
		/* list will contain not unescaped values, so   */
		/* we don't have to escape them when displaying */
		env = getenv("CHARSET_SAVED_QUERY_STRING");
		DpsParseQStringUnescaped(&query_vars,env?env:query_string);
	
		/* Unescape and save variables from QUERY_STRING */
		/* Env->Vars will have unescaped values however  */
		DpsParseQueryString(Agent,&Env->Vars,query_string);

		DPS_FREE(template_filename);
		template_filename = (char*)DpsStrdup(DpsVarListFindStr(&Env->Vars, "tmplt", ""));
		if (*template_filename == '\0') { 
		  DPS_FREE(template_filename); template_filename = (char*)DpsStrdup("filler.htm"); 
		}
	
		/*// Get template name from command line variable &tmplt */
		if(!template_name[0])
			dps_snprintf(template_name,sizeof(template_name),"%s/%s", conf_dir, template_filename);
	}
	
	DpsVarListReplaceStr(&Agent->Conf->Vars, "tmplt", template_filename);
	DPS_FREE(template_filename);

	Agent->tmpl.Env_Vars = &Env->Vars;
	
	DpsURLNormalizePath(template_name);
	
	if (strncmp(template_name, conf_dir, dps_strlen(conf_dir)) 
	    || (res = DpsTemplateLoad(Agent, Env, &Agent->tmpl, template_name))) {
	  if (strcmp(template_name, "filler.htm")) { /* trying load default template */
	    fprintf(stderr, "Can't load template: '%s' %s\n", template_name, Env->errstr);
	    DPS_FREE(template_filename);
	    template_filename = (char*)DpsStrdup("filler.htm");
	    dps_snprintf(template_name, sizeof(template_name), "%s/%s", conf_dir, template_filename);

	    if ((res = DpsTemplateLoad(Agent, Env, &Agent->tmpl, template_name))) {

		if(httpd)printf("Content-Type: text/plain\r\n\r\n");
		printf("%s\n",Env->errstr);
		DpsVarListFree(&query_vars);
		DpsEnvFree(Env);
		DPS_FREE(query_string);
		DpsAgentFree(Agent);
		return(0);
	    }
	  } else {
		if(httpd)printf("Content-Type: text/plain\r\n\r\n");
		printf("%s\n",Env->errstr);
		DpsVarListFree(&query_vars);
		DpsEnvFree(Env);
		DPS_FREE(query_string);
		DpsAgentFree(Agent);
		return(0);
	  }
	}

	/* set locale if specified */
	if ((url = DpsVarListFindStr(&Env->Vars, "Locale", NULL)) != NULL) {
	  setlocale(LC_ALL, url);
/*#ifdef HAVE_ASPELL*/
	  { char *p;
	    if ((p = strchr(url, '.')) != NULL) {
	      *p = '\0';
	      DpsVarListReplaceStr(&Env->Vars, "g-lc", url);
	      *p = '.';
	    }
	  }
/*#endif*/
	  url = NULL;
	}
	
	/* Call again to load search Limits if need */
	DpsParseQueryString(Agent, &Env->Vars, query_string);
	Agent->Flags = Env->Flags;
	Agent->flags |= DPS_FLAG_UNOCON;
	Env->flags |= DPS_FLAG_UNOCON;
	DpsSetLogLevel(NULL, DpsVarListFindInt(&Env->Vars, "LogLevel", 0));
	DpsOpenLog("filler.cgi", Env, !strcasecmp(DpsVarListFindStr(&Env->Vars, "Log2stderr", (!httpd) ? "yes" : "no"), "yes"));
	DpsLog(Agent,DPS_LOG_ERROR,"filler.cgi started with '%s'",template_name);
		DpsLog(Agent, DPS_LOG_DEBUG, "VarDir: '%s'", DpsVarListFindStr(&Agent->Conf->Vars, "VarDir", DPS_VAR_DIR));
		DpsLog(Agent, DPS_LOG_DEBUG, "Affixes: %d, Spells: %d, Synonyms: %d, Acronyms: %d, Stopwords: %d",
		       Env->Affixes.naffixes,Env->Spells.nspell,
		       Env->Synonyms.nsynonyms,
		       Env->Acronyms.nacronyms,
		       Env->StopWords.nstopwords);
		DpsLog(Agent, DPS_LOG_DEBUG, "Chinese dictionary with %d entries", Env->Chi.nwords);
		DpsLog(Agent, DPS_LOG_DEBUG, "Korean dictionary with %d entries", Env->Korean.nwords);
		DpsLog(Agent, DPS_LOG_DEBUG, "Thai dictionary with %d entries", Env->Thai.nwords);
	DpsVarListAddLst(&Agent->Vars, &Env->Vars, NULL, "*");
	Agent->tmpl.Env_Vars = &Agent->Vars;
/*	DpsVarListAddEnviron(&Agent->Vars, "ENV");*/
/****************************************************************************************************************************************/
	/* This is for query tracking */
	DpsVarListAddStr(&Agent->Vars, "QUERY_STRING", query_string);
	DpsVarListAddStr(&Agent->Vars, "self", self);
	env = getenv("HTTP_X_FORWARDER_FOR");
	if (env) {
	  DpsVarListAddStr(&Agent->Vars, "IP", env);
	} else {
	  env = getenv("REMOTE_ADDR");
	  DpsVarListAddStr(&Agent->Vars, "IP", env ? env : "localhost");
	}
	
	bcharset = DpsVarListFindStr(&Agent->Vars, "BrowserCharset", "iso-8859-1");
	Env->bcs=DpsGetCharSet(bcharset);
	lcharset = DpsVarListFindStr(&Agent->Vars, "LocalCharset", "iso-8859-1");
	Env->lcs=DpsGetCharSet(lcharset);

	ResultContentType = DpsVarListFindStr(&Agent->Vars, "ResultContentType", "text/html");
	
	if(httpd){
		if(!Env->bcs){
			printf("Content-Type: text/plain\r\n\r\n");
			printf("Unknown BrowserCharset '%s' in template '%s'\n",bcharset,template_name);
			exit(0);
		}else if(!Env->lcs){
			printf("Content-Type: text/plain\r\n\r\n");
			printf("Unknown LocalCharset '%s' in template '%s'\n",lcharset,template_name);
			exit(0);
		}else{
		  printf("Content-type: %s; charset=%s\r\n\r\n", ResultContentType, bcharset);
		}
	}else{
		if(!Env->bcs){
			printf("Unknown BrowserCharset '%s' in template '%s'\n",bcharset,template_name);
			exit(0);
		}
		if(!Env->lcs){
			printf("Unknown LocalCharset '%s' in template '%s'\n",lcharset,template_name);
			exit(0);
		}
	}
	
	/* These parameters taken from "variable section of template"*/
	
	res         = DpsVarListFindInt(&Agent->Vars, "ps", DPS_DEFAULT_PS);
	page_size   = dps_min(res, MAX_PS);
	page_number = DpsVarListFindInt(&Agent->Vars, "p", 0);
	if (page_number == 0) {
	  page_number = DpsVarListFindInt(&Agent->Vars, "np", 0);
	  DpsVarListReplaceInt(&Agent->Vars, "p", page_number + 1);
	} else page_number--;
	
	res = DpsVarListFindInt(&Agent->Vars, "np", 0) * page_size;
	DpsVarListAddInt(&Agent->Vars, "pn", res);
	
	catcolumns = (size_t)atoi(DpsVarListFindStr(&Agent->Vars, "CatColumns", ""));


	DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "top");

	DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "restop");
	
	DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "res");

	DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "resbot");
	
	DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "bottom");
	
	DpsVarListFree(&query_vars);
	DpsAgentFree(Agent);
	DpsEnvFree(Env);
	DPS_FREE(query_string);
	DPS_FREE(url);
	if (httpd) fflush(NULL); else fclose(stdout);
	
#ifdef EFENCE
	fprintf(stderr, "Memory leaks checking\n");
	DpsEfenceCheckLeaks();
#endif
#ifdef FILENCE
	fprintf(stderr, "FD leaks checking\n");
	DpsFilenceCheckLeaks(NULL);
#endif

	return DPS_OK;
}
示例#8
0
extern __C_LINK int __DPSCALL DpsBaseOptimize(DPS_BASE_PARAM *P, int sbase) {
  struct	stat sb;
  urlid_t base, base_from, base_to;
  long unsigned ActualSize, OriginalSize, i, nitems;
  off_t pos, posold, NewItemPos, SSize;
  dps_uint8 diff, gain;
  double dr = 0.0, cr = 0.0;
  ssize_t nread; size_t rsize;
  ssize_t wr;
  int OptimizeRatio, res, error_cnt;
  char buffer[BUFSIZ];
  DPS_BASEITEM *hTable;
  DPS_SORTBASEITEM *si = NULL;

  OptimizeRatio = DpsVarListFindInt(&P->A->Vars, "OptimizeRatio", 15);

  P->mode = DPS_WRITE_LOCK;
  if (sbase == -1) {
    base_from = 0; base_to = (urlid_t)P->NFiles;
  } else {
    base_from = sbase; base_to = sbase + 1;
  }

  for (base = base_from; base < base_to; base++) {

    error_cnt = 0;
    gain = (dps_uint8)0;
    P->rec_id = ((base & DPS_BASE_MASK) << DPS_BASE_BITS);
    if (DpsBaseOpen(P, DPS_WRITE_LOCK) != DPS_OK) {
      DpsLog(P->A, DPS_LOG_ERROR, "Can't open base %s/%s {%s:%d}", P->subdir, P->basename, __FILE__, __LINE__);
      DpsBaseClose(P);
      return DPS_ERROR;
    }
    if (lseek(P->Ifd, (off_t)0, SEEK_SET) == (off_t)-1) {
      DpsLog(P->A, DPS_LOG_ERROR, "Can't seek %s {%s:%d}", P->Ifilename, __FILE__, __LINE__);
      DpsBaseClose(P);
      return DPS_ERROR;
    }

    if (fstat(P->Sfd, &sb) == 0) {
      SSize = sb.st_size;
    } else {
      if ((SSize = (off_t)lseek(P->Sfd, (off_t)0, SEEK_END)) == (off_t)-1) {
	DpsLog(P->A, DPS_LOG_ERROR, "Can't seek %s {%s:%d}", P->Sfilename, __FILE__, __LINE__);
	DpsBaseClose(P);
	return DPS_ERROR;
      }
    }

    nitems = 0;
    ActualSize = 0;
    OriginalSize = 0;
    while(read(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) == sizeof(DPS_BASEITEM)) {
      nitems++;
      if ((P->Item.rec_id != 0) && ((dps_uint8)P->Item.offset < (dps_uint8)SSize) && (P->Item.size > 0)) {
	ActualSize += (long unsigned)P->Item.size;
	OriginalSize += (long unsigned)(P->Item.orig_size ? P->Item.orig_size : P->Item.size);
      }
    }
    if (ftruncate(P->Ifd, (off_t)(nitems * sizeof(DPS_BASEITEM))) != 0) {
	dps_strerror(P->A, DPS_LOG_EXTRA, "ftruncate error (pos:%ld) [%s:%d]", (off_t)(nitems * sizeof(DPS_BASEITEM)), __FILE__, __LINE__);
    }

    dr = (nitems) ? fabs(100.0 * ((long unsigned)SSize - ActualSize) / ((double)SSize + 1.0)) : 0.0;
    cr = (nitems) ? fabs(100.0 * ActualSize / (OriginalSize + 1)) : 0.0;

    DpsLog(P->A, DPS_LOG_EXTRA, "Optimize: %s/%s base 0x%X, %ld recs defrag: %.2f%% Ratio: %.2f%% Data: %ld File: %ld", 
	   P->subdir, P->basename, P->FileNo, nitems, dr, cr,  ActualSize, (long)SSize);

    if ((dr >= (double)OptimizeRatio) || (ActualSize == 0 && SSize != 0)) {

      si = (DPS_SORTBASEITEM*)DpsMalloc((nitems + 1) * sizeof(DPS_SORTBASEITEM));

      if (si == NULL) {
	DpsLog(P->A, DPS_LOG_ERROR, "Can't alloc si (%d bytes) at {%s:%d}", (nitems + 1) * sizeof(DPS_SORTBASEITEM), __FILE__, __LINE__);
	DpsBaseClose(P);
	return DPS_ERROR;
      }
      if (lseek(P->Ifd, (off_t)0, SEEK_SET) == (off_t)-1) {
	DpsLog(P->A, DPS_LOG_ERROR, "Can't seek %s {%s:%d}", P->Ifilename, __FILE__, __LINE__);
	DpsBaseClose(P);
	DPS_FREE(si);
	return DPS_ERROR;
      }

      for (i = 0; (i < nitems) && (read(P->Ifd, &si[i].Item, sizeof(DPS_BASEITEM)) == sizeof(DPS_BASEITEM)); ) {
	if(si[i].Item.rec_id != 0 && ((dps_uint8)si[i].Item.offset < (dps_uint8)SSize) && (si[i].Item.size > 0) && (si[i].Item.size < ActualSize) ) {
	  i++;
	}
      }

      if (i < nitems) nitems = i;
      if (nitems > 1) DpsSort((void*)si, (size_t)nitems, sizeof(DPS_SORTBASEITEM), cmpsi);

      gain = (dps_uint8)0;
      pos = (off_t)0;
      posold = (off_t)0;
      if (nitems > 0) {
	if ((long unsigned)si[0].Item.offset < (long unsigned)SSize) {
	  posold = (off_t)si[0].Item.offset;
	} else {
	  si[0].Item.offset = (off_t)0;
	  si[0].Item.size = 0;
	}
      }
      if (nitems > 1) {
	if (si[0].Item.size > (rsize = (size_t)(si[1].Item.offset - si[0].Item.offset))) {
	  DpsLog(P->A, DPS_LOG_ERROR, "si[0] size adjusted by offset: %ld -> %ld", (long)si[0].Item.size, (long)rsize);
	  si[0].Item.size = rsize;
	  error_cnt++;
	}
      }
      if ((diff = (dps_uint8)posold) > 0) {
	for(
	    lseek(P->Sfd, posold, SEEK_SET), rsize = 0;
	    (rsize < si[0].Item.size) && ((nread = read(P->Sfd, buffer, 
							(rsize + BUFSIZ < si[0].Item.size) ? BUFSIZ : (si[0].Item.size - rsize) )) > 0);
	    lseek(P->Sfd, posold, SEEK_SET)
	    ) {
	  lseek(P->Sfd, pos, SEEK_SET);
	  (void)write(P->Sfd, buffer, (size_t)nread);
	  rsize += (size_t)nread;
	  posold += (off_t)nread;
	  pos += (off_t)nread;
	}
	si[0].Item.offset = 0;
	if (rsize != si[0].Item.size) {
	  DpsLog(P->A, DPS_LOG_ERROR, "si[0] size adjusted by size: %ld -> %ld", (long)si[0].Item.size, (long)rsize);
	  si[0].Item.size = rsize;
	  error_cnt++;
	}
	gain += diff;
      }
      
      if (nitems > 0)
      for (i = 0; i < nitems - 1; i++) {
	if ((long unsigned)si[i + 1].Item.offset > (long unsigned)SSize) {
	  DpsLog(P->A, DPS_LOG_ERROR, "si[%ld] too long offset: %ld > %ld, removing", i , (long)si[i + 1].Item.offset, (long)SSize);
	  si[i + 1].Item.size = 0;
	  si[i + 1].Item.offset = si[i].Item.offset + si[i].Item.size;
	  error_cnt++;
	} else {
	  pos = (off_t)(si[i].Item.offset + si[i].Item.size);
	  posold = (off_t)si[i + 1].Item.offset;
	  if (i < nitems - 2) {
	    if (si[i + 1].Item.size > (rsize = (size_t)(si[i + 2].Item.offset - si[i + 1].Item.offset))) {
	      DpsLog(P->A, DPS_LOG_ERROR, "si[%ld] size adjusted by offset: %ld -> %ld", i + 1, (long)si[i + 1].Item.size, (long)rsize );
	      si[i + 1].Item.size = rsize;
	      error_cnt++;
	    }
	  }
	  if ((diff = (dps_uint8)posold - (dps_uint8)pos) > 0) {
	    for(
		lseek(P->Sfd, posold, SEEK_SET), rsize = 0;
		(rsize < si[i + 1].Item.size) && ((nread = read(P->Sfd, buffer,
					      (rsize + BUFSIZ < si[i + 1].Item.size) ? BUFSIZ : (si[i + 1].Item.size - rsize) )) > 0);
		lseek(P->Sfd, posold, SEEK_SET)
		) {
	      lseek(P->Sfd, pos, SEEK_SET);
	      (void)write(P->Sfd, buffer, (size_t)nread);
	      rsize += (size_t)nread;
	      posold += (off_t)nread;
	      pos += (off_t)nread;
	    }
	    if (rsize != si[i + 1].Item.size) {
	      DpsLog(P->A, DPS_LOG_ERROR, "si[%ld] size adjusted by size: %ld -> %ld", i + 1, (long)si[i + 1].Item.size, (long)rsize);
	      si[i + 1].Item.size = rsize;
	      error_cnt++;
	    }
	    si[i + 1].Item.offset = si[i].Item.offset + si[i].Item.size;
	    gain += diff;
	  }
	}
      }
      posold = SSize;
      pos = (nitems) ? (off_t)(si[nitems - 1].Item.offset + si[nitems - 1].Item.size) : (off_t)0;
      if (ftruncate(P->Sfd, (off_t)(pos)) != 0) {
	dps_strerror(P->A, DPS_LOG_ERROR, "ftruncate error (pos:%ld) [%s:%d]", pos, __FILE__, __LINE__);
      }
      SSize = pos;

      if (posold > pos) {
	gain += ((dps_uint8)posold - (dps_uint8)pos);
      }

      /*if (gain != 0 || OptimizeRatio == 0 || error_cnt > 0)*/ {

	posold = lseek(P->Ifd, (off_t)0, SEEK_END);
	(void)ftruncate(P->Ifd, (off_t)0);
	lseek(P->Ifd, (off_t)0, SEEK_SET);

	if ((hTable = (DPS_BASEITEM *)DpsXmalloc(sizeof(DPS_BASEITEM) * DPS_HASH_PRIME)) == NULL) {
	  DpsLog(P->A, DPS_LOG_ERROR, "Memory alloc error hTable: %d bytes", sizeof(DPS_BASEITEM) * DPS_HASH_PRIME);
	  DpsBaseClose(P);
	  DPS_FREE(si);
	  return DPS_ERROR;
	}
	if ( (wr = write(P->Ifd, hTable, sizeof(DPS_BASEITEM) * DPS_HASH_PRIME)) != sizeof(DPS_BASEITEM) * DPS_HASH_PRIME) {
	  dps_strerror(P->A, DPS_LOG_ERROR, "[%s:%d] Can't set new index for file %s\nwritten %d bytes of %d",
		 __FILE__, __LINE__, P->Ifilename, wr, sizeof(DPS_BASEITEM) * DPS_HASH_PRIME);
	  DPS_FREE(hTable);
	  DpsBaseClose(P);
	  DPS_FREE(si);
	  return DPS_ERROR;
	}
	DPS_FREE(hTable);

	for (i = 0; i < nitems; i++) {
	  if (si[i].Item.rec_id == 0 || si[i].Item.size == 0) continue;
	  if ((long)si[i].Item.offset > (long)SSize) {
	    DpsLog(P->A, DPS_LOG_ERROR, "si[%ld] too long offset: %ld > %ld, removing", i , (long)si[i].Item.offset, (long)SSize);
	    error_cnt++;
	    continue;
	  }
	  P->rec_id = si[i].Item.rec_id;
	  if ((res = DpsBaseSeek(P, DPS_WRITE_LOCK)) != DPS_OK) {
	    DpsBaseClose(P);
	    DPS_FREE(si);
	    return res;
	  }
	  if (P->Item.rec_id != P->rec_id) {
	    if (P->mishash && P->Item.rec_id != 0) {
	      if ((P->Item.next = (dps_uint8)(NewItemPos = lseek(P->Ifd, (off_t)0, SEEK_END))) == (dps_uint8)-1) {
		DpsBaseClose(P);
		DPS_FREE(si);
		return DPS_ERROR;
	      }
	      if (lseek(P->Ifd, (off_t)P->CurrentItemPos, SEEK_SET) == (off_t)-1) {
		DpsBaseClose(P);
		DPS_FREE(si);
		return DPS_ERROR;
	      }
	      if (write(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) != sizeof(DPS_BASEITEM)) {
		DpsBaseClose(P);
		DPS_FREE(si);
		return DPS_ERROR;
	      }
	      P->CurrentItemPos = (dps_uint8)NewItemPos;
	    }
	  }
	  P->Item = si[i].Item;
	  P->Item.next = (off_t)0;
	  if (lseek(P->Ifd, (off_t)P->CurrentItemPos, SEEK_SET) == (off_t)-1) {
	    DpsLog(P->A, DPS_LOG_ERROR, "Can't seek %s {%s:%d}", P->Ifilename, __FILE__, __LINE__);
	    DpsBaseClose(P);
	    DPS_FREE(si);
	    return DPS_ERROR;
	  }
	  if (write(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) != sizeof(DPS_BASEITEM)) {
	    DpsLog(P->A, DPS_LOG_ERROR, "Can't write index for file %s {%s:%d}", P->Ifilename, __FILE__, __LINE__);
	    DpsBaseClose(P);
	    DPS_FREE(si);
	    return DPS_ERROR;
	  }
	}
	pos = lseek(P->Ifd, (off_t)0, SEEK_END);
	gain += ((dps_uint8)posold - (dps_uint8)pos);

	DpsLog(P->A, DPS_LOG_DEBUG, "Optimize: %s/%s base 0x%X cleaned, %ld bytes freed", P->subdir, P->basename, base, gain);
      }

      DPS_FREE(si);
    }

    if (error_cnt) base--;
    DpsBaseClose(P);
  }
  return DPS_OK;
}
示例#9
0
int DpsCarryLimitWrite(DPS_AGENT *Indexer, FILE *f, urlid_t *data, size_t num) {
  unsigned char *bits;
  urlid_t curr = 0;
  urlid_t *a;
  size_t i, n, z;
  int j;
  size_t avail, elems;
  unsigned size, max_bits;
  unsigned char *table, *base;
  unsigned global_max, global_sum, global_n;

  /* allocating mem for bits[i] - minimal bits needed to code a[i] */
  if (! (bits = (unsigned char*)malloc(ELEMS_PER_BLOCK * sizeof(unsigned char)))) {
    DpsLog(Indexer, DPS_LOG_ERROR, "Out of memory [%s:%d]", __FILE__, __LINE__);
    return DPS_ERROR;
  }
  if (! (a = (urlid_t*) malloc(ELEMS_PER_BLOCK * sizeof(urlid_t)))) {
    DpsLog(Indexer, DPS_LOG_ERROR, "Out of memory [%s:%d]", __FILE__, __LINE__);
    DPS_FREE(bits);
    return DPS_ERROR;
  }

  CARRY_ENCODE_START(f);
  size = TRANS_TABLE_STARTER;
  global_max = global_sum = global_n = 0;

  for (z = 0; z < num; ) {

    for (n = 0; (n < ELEMS_PER_BLOCK) && (z < num); n++,z++) {
      a[n] = data[z] - curr;
      curr = data[z];
    }

    max_bits = CalcMinBits(a, bits, n, &global_max, &global_sum, &global_n);
    CARRY_BLOCK_ENCODE_START(n, max_bits);
    for (i=0; i<n; )
    {
      avail = GET_AVAILABLE_BITS;
      table = GET_TRANS_TABLE(avail);
      base= table+(size<<2);       /* row in trans table */
    
      /* 1. Modeling: Find j= the first-fit column in base */	
      for (j=0; j<4; j++)
      {
        size = base[j];
        if (size >avail) 		/* must use next word for data  */
        {
	  avail=32;
	  j=-1;
	  continue;
        }
        if ( elems=elems_coded(avail,size,bits,i,n-1) )
          break;
      }
 
      /* 2. Coding: Code elements using row "base" & column "j" */
      WORD_ENCODE(j+1,2);             /* encoding column */
      for ( ; elems ; elems--, i++)   /* encoding d-gaps */
        WORD_ENCODE(a[i],size);
    }

  }
  CARRY_ENCODE_END;

  DPS_FREE(a);
  DPS_FREE(bits);

  return DPS_OK;
}
示例#10
0
__C_LINK void * __DPSCALL DpsBaseARead(DPS_BASE_PARAM *P, size_t *len) {
  int res = DPS_OK;
  char *buf = NULL;
#ifdef HAVE_ZLIB
  Byte *CDoc = NULL;
  z_stream zstream;
#endif

  if ((res = DpsBaseSeek(P, DPS_READ_LOCK)) != DPS_OK) {
    *len = 0;
    return NULL;
  }

  if (P->Item.rec_id == P->rec_id) {
    if (lseek(P->Sfd, (off_t)P->Item.offset, SEEK_SET) == (off_t)-1) {
      *len = 0;
      return NULL;
    }
#ifdef HAVE_ZLIB

    bzero(&zstream, sizeof(zstream));

    if ((P->zlib_method == Z_DEFLATED) && (P->Item.orig_size != 0)) {
	zstream.avail_in = (uInt)P->Item.size;
	*len = zstream.avail_out = (uInt)(2 * P->Item.size + P->Item.orig_size);
      CDoc = zstream.next_in = (Byte *) DpsMalloc(P->Item.size + 1);
      if (CDoc == NULL) {
	*len = 0;
	return NULL;
      }
      if ((buf = (char*)DpsMalloc(*len + 1)) == NULL) {
	DPS_FREE(CDoc);
	*len = 0;
	return NULL;
      }
/*      fprintf(stderr, "BaseARead: Item.size: %d  .orig_size: %d  len: %d\n", P->Item.size, P->Item.orig_size, *len);*/
      zstream.next_out = (Byte *) buf;
      zstream.zalloc = Z_NULL;
      zstream.zfree = Z_NULL;
      zstream.opaque = Z_NULL;
      if (read(P->Sfd, CDoc, P->Item.size) != (ssize_t)P->Item.size) {
	DpsLog(P->A, DPS_LOG_ERROR, "[%s/%s] %d read error, rec_id: %x, deleting... -- %d",  P->subdir, P->basename, P->Item.size, P->rec_id, __LINE__);
	DpsBaseDelete(P);
	DPS_FREE(buf);
	DPS_FREE(CDoc);
	return NULL;
      }
      inflateInit2(&zstream, P->zlib_windowBits);
      res = inflate(&zstream, Z_FINISH);
/*      fprintf(stderr, "inflate exit: %d  avail_out: %d  total_out: %d  avail_in: %d\n", 
	      res, zstream.avail_out, zstream.total_out, zstream.avail_in);*/
	;
      *len = zstream.total_out;
      inflateEnd(&zstream);
      DPS_FREE(CDoc);

    } else 
#endif
      {
	if ((buf = (char*)DpsMalloc((*len = P->Item.size) + 1)) == NULL) {
	  *len = 0;
	  return NULL;
	}
	if (read(P->Sfd, buf, P->Item.size) != (ssize_t)P->Item.size) {
	  DpsFree(buf);
	  *len = 0;
	  return NULL;
	}
      }
  } else {
    DpsLog(P->A, DPS_LOG_DEBUG, "%s:[%s/%s] Not found rec_id: %x", P->vardir, P->subdir, P->basename, P->rec_id);
    *len = 0;
    return NULL;
  }
  buf[*len] = '0';
#ifdef DEBUG_SEARCH
  DpsLog(P->A, DPS_LOG_DEBUG, "[%s/%s] ARetrieved rec_id: %x Size: %d->%d", P->subdir, P->basename, P->rec_id, P->Item.size, P->Item.orig_size);
#endif
  return buf;
}
示例#11
0
__C_LINK int __DPSCALL DpsBaseCheckup(DPS_BASE_PARAM *P, int (*checkrec) (DPS_AGENT *A, const urlid_t rec_id)) {
  int found;
  urlid_t i;
  size_t z;
  urlid_t *todel = (int*)DpsMalloc(128 * sizeof(urlid_t));
  size_t ndel = 0, mdel = 128, totaldel = 0;

  if (todel == NULL) return DPS_ERROR;

  for (i = 0; i < (urlid_t)P->NFiles; i++) {

    if (have_sigterm || have_sigint || have_sigalrm) {
      DpsLog(P->A, DPS_LOG_EXTRA, "%s signal received. Exiting chackup", (have_sigterm) ? "SIGTERM" :
	     (have_sigint) ? "SIGINT" : "SIGALRM");
      DpsBaseClose(P);
      DPS_FREE(todel);
      return DPS_OK;
    }
    P->rec_id = i << DPS_BASE_BITS;
    if (DpsBaseOpen(P, DPS_READ_LOCK) != DPS_OK) {
      DpsBaseClose(P);
      continue;
    }
    if (lseek(P->Ifd, (off_t)0, SEEK_SET) == (off_t)-1) {
      DpsLog(P->A, DPS_LOG_ERROR, "Can't seeek for file %s", P->Ifilename);
      DpsBaseClose(P);
      DPS_FREE(todel);
      return DPS_ERROR;
    }
    while (read(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) == sizeof(DPS_BASEITEM)) {
      if (P->Item.rec_id != 0) {
	found = checkrec(P->A, P->Item.rec_id);
	if (found == 0) {
	  if (ndel >= mdel) {
	    mdel += 128;
	    todel = (urlid_t*)DpsRealloc(todel, mdel * sizeof(urlid_t));
	    if (todel == NULL) {
	      DpsBaseClose(P);
	      DpsLog(P->A, DPS_LOG_ERROR, "Can't realloc %d bytes %s:%d", mdel * sizeof(urlid_t),  __FILE__, __LINE__);
	      DPS_FREE(todel);
	      return DPS_ERROR;
	    }
	  }
	  todel[ndel++] = P->Item.rec_id;
	}
      }
    }
    DpsBaseClose(P);
    for (z = 0; z < ndel; z++) {
      DpsLog(P->A, DPS_LOG_DEBUG, "Base %s/%s store %03X: deleting url_id: %X", P->subdir, P->basename, i, todel[z]);
      P->rec_id = todel[z];
      DpsBaseDelete(P);
    }
    DpsBaseClose(P);
    DpsLog(P->A, DPS_LOG_INFO, "Base %s/%s store %03X, %d lost records deleted", P->subdir, P->basename, i, ndel);
    totaldel += ndel;
    ndel = 0;
  }
  DPS_FREE(todel);
  DpsLog(P->A, DPS_LOG_EXTRA, "Total lost record(s) deleted: %d\n", totaldel);
  return DPS_OK;
}
示例#12
0
__C_LINK int __DPSCALL DpsBaseOpen(DPS_BASE_PARAM *P, int mode) {
  unsigned int hash;
  size_t filenamelen, z;
  ssize_t wr;
  DPS_BASEITEM  *hTable;
#ifdef DEBUG_SEARCH
  unsigned long total_ticks, stop_ticks, start_ticks = DpsStartTimer();
#endif

  TRACE_IN(P->A, "DpsBaseOpen");

  if (P->opened) DpsBaseClose(P);

  if (P->NFiles == 0) P->NFiles = DpsVarListFindUnsigned(&P->A->Vars, "BaseFiles", 0x100);
  P->FileNo =  DPS_FILENO(P->rec_id, P->NFiles);

  hash = DPS_HASH(P->rec_id);
  filenamelen = dps_strlen(P->vardir) + dps_strlen(P->subdir) + dps_strlen(P->indname) + dps_strlen(P->basename) +  48;
  if (
      ((P->Ifilename = (char *)DpsMalloc(filenamelen)) == NULL) ||
      ((P->Sfilename = (char *)DpsMalloc(filenamelen)) == NULL)            ) {
    DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
    DpsLog(P->A, DPS_LOG_ERROR, "Memory alloc error 2x%d bytes %s:%d", filenamelen, __FILE__, __LINE__);
    TRACE_OUT(P->A);
    return DPS_ERROR;
  }
  sprintf(P->Sfilename, "%s/%s/%s%04zx.s", P->vardir, P->subdir, P->basename, P->FileNo);
  sprintf(P->Ifilename, "%s/%s/%s%04zx.i", P->vardir, P->subdir, P->indname, P->FileNo);

  if ((P->Ifd = DpsOpen2(P->Ifilename, ((mode == DPS_READ_LOCK) ? O_RDONLY : O_RDWR) | DPS_BINARY)) < 0) {
    if ((mode == DPS_READ_LOCK) || ((P->Ifd = DpsOpen3(P->Ifilename, O_RDWR | O_CREAT | DPS_BINARY
/*#ifdef O_DIRECT
		     | O_DIRECT
#endif*/
						   ,
						   S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH
						   )) < 0)) {
      dps_strerror(P->A, (mode == DPS_READ_LOCK && errno == ENOENT) ? DPS_LOG_DEBUG : DPS_LOG_ERROR, "Can't open/create file %s for %s [%s:%d]", 
	     P->Ifilename, (mode == DPS_READ_LOCK) ? "read" : "write", __FILE__, __LINE__);
      DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
      TRACE_OUT(P->A);
      return DPS_ERROR;
    }
#if 1
    DPS_GETLOCK(P->A, DPS_LOCK_BASE_N(P->FileNo));
#endif
    DpsWriteLock(P->Ifd);
    if ((hTable = (DPS_BASEITEM *)DpsXmalloc(sizeof(DPS_BASEITEM) * DPS_HASH_PRIME)) == NULL) {
      DpsLog(P->A, DPS_LOG_ERROR, "Memory alloc error hTable: %d bytes", sizeof(DPS_BASEITEM) * DPS_HASH_PRIME);
      DpsUnLock(P->Ifd); 
#if 1
      DPS_RELEASELOCK(P->A, DPS_LOCK_BASE_N(P->FileNo));
#endif
      DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
      TRACE_OUT(P->A);
      return DPS_ERROR;
    }
    if ( (wr = write(P->Ifd, hTable, sizeof(DPS_BASEITEM) * DPS_HASH_PRIME)) != sizeof(DPS_BASEITEM) * DPS_HASH_PRIME) {
      dps_strerror(P->A, DPS_LOG_ERROR, "Can't set new index for file %s\nwritten %d bytes of %d\nIfd:%d hTable:%x", 
	     P->Ifilename, wr, sizeof(DPS_BASEITEM) * DPS_HASH_PRIME, P->Ifd, hTable);
      DPS_FREE(hTable);
      DpsUnLock(P->Ifd); 
#if 1
      DPS_RELEASELOCK(P->A, DPS_LOCK_BASE_N(P->FileNo));
#endif
      DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
      TRACE_OUT(P->A);
      return DPS_ERROR;
    }
    DpsUnLock(P->Ifd); 
#if 1
    DPS_RELEASELOCK(P->A, DPS_LOCK_BASE_N(P->FileNo));
#endif
    DPS_FREE(hTable);
    if (lseek(P->Ifd, (off_t)0, SEEK_SET) == (off_t)-1) {
      DpsLog(P->A, DPS_LOG_ERROR, "Can't seek for file %s", P->Ifilename);
      DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
      TRACE_OUT(P->A);
      return DPS_ERROR;
    }
  }
  if (!P->A->Flags.cold_var) {
#if 1
    DPS_GETLOCK(P->A, DPS_LOCK_BASE_N(P->FileNo));
#endif
    switch (mode) {
    case DPS_READ_LOCK:
      DpsReadLock(P->Ifd);
      break;
    case DPS_WRITE_LOCK:
      DpsWriteLock(P->Ifd);
      break;
    }
    P->locked = 1;
  }

  if ((P->Sfd = DpsOpen2(P->Sfilename, ((mode == DPS_READ_LOCK) ? O_RDONLY : O_RDWR) | DPS_BINARY
/*#ifdef O_DIRECT
		     | O_DIRECT
#endif*/
		     )) < 0) {
    if ((mode == DPS_READ_LOCK) || ((P->Sfd = DpsOpen3(P->Sfilename, O_RDWR | O_CREAT | DPS_BINARY
/*#ifdef O_DIRECT
		     | O_DIRECT
#endif*/
						   , 
						   S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH
						   )) < 0)) {
      DpsLog(P->A, DPS_LOG_ERROR, "Can't open/create file %s", P->Sfilename);
      DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
      TRACE_OUT(P->A);
      return DPS_ERROR;
    }
  }
  if (!P->A->Flags.cold_var) {
    switch(mode) {
    case DPS_READ_LOCK:
      DpsReadLock(P->Sfd);
      break;
    case DPS_WRITE_LOCK:
      DpsWriteLock(P->Sfd);
      break;
    }
  }

#ifdef DEBUG_SEARCH
    stop_ticks = DpsStartTimer();
    total_ticks = stop_ticks - start_ticks;
    DpsLog(P->A, DPS_LOG_EXTRA, "OpenBase1 %03X in %.5f sec.", P->FileNo, (float)total_ticks / 1000);
#endif

    for (z = 0; z < 3; z++) {

	/* search rec_id */
	if ( (P->CurrentItemPos = (dps_uint8)lseek(P->Ifd, (off_t)(hash * sizeof(DPS_BASEITEM)), SEEK_SET)) == (dps_uint8)-1) {
	    DpsLog(P->A, DPS_LOG_ERROR, "Can't seeek for file %s", P->Ifilename);
	    DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
	    TRACE_OUT(P->A);
	    return DPS_ERROR;
	}
      if (read(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) != sizeof(DPS_BASEITEM)) {
	DpsLog(P->A, DPS_LOG_ERROR, "{%s:%d} Can't read index for file %s seek:%ld hash: %u (%d)", 
	       __FILE__, __LINE__, P->Ifilename, P->CurrentItemPos, hash, hash);
	bzero(&P->Item, sizeof(P->Item));
/*	DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
	TRACE_OUT(P->A);
	return DPS_ERROR;
*/
      }

#ifdef DEBUG_SEARCH
      stop_ticks = DpsStartTimer();
      total_ticks = stop_ticks - start_ticks;
      DpsLog(P->A, DPS_LOG_EXTRA, "OpenBase2 %03X in %.5f sec.", P->FileNo, (float)total_ticks / 1000);
#endif

      if (P->Item.rec_id == P->rec_id || P->Item.rec_id == 0) P->mishash = 0;
      else P->mishash = 1;
      P->PreviousItemPos = P->CurrentItemPos;
      if (P->mishash)
	while((P->Item.next != 0) && (P->Item.rec_id != P->rec_id)) {
	  P->PreviousItemPos = P->CurrentItemPos;
	  P->CurrentItemPos = P->Item.next;
	  if (lseek(P->Ifd, (off_t)P->CurrentItemPos, SEEK_SET) == (off_t)-1) {
	    DpsLog(P->A, DPS_LOG_ERROR, "Can't seek for file %s", P->Ifilename);
	    DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
	    TRACE_OUT(P->A);
	    return DPS_ERROR;
	  }
	  if ((wr = read(P->Ifd, &P->Item, sizeof(DPS_BASEITEM))) != sizeof(DPS_BASEITEM)) {
	    if (wr == 0) {
	      DpsLog(P->A, DPS_LOG_ERROR, "Possible corrupted hash chain for file %s, trying to restore (%s:%d)", 
		     P->Ifilename, __FILE__, __LINE__);
	      if (lseek(P->Ifd, (off_t)P->PreviousItemPos, SEEK_SET) == (off_t)-1) {
		DpsLog(P->A, DPS_LOG_ERROR, "Can't seek for file %s (%s:%d)", P->Ifilename, __FILE__, __LINE__);
		DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
		TRACE_OUT(P->A);
		return DPS_ERROR;
	      }
	      if ((wr = read(P->Ifd, &P->Item, sizeof(DPS_BASEITEM))) != sizeof(DPS_BASEITEM)) {
		DpsLog(P->A, DPS_LOG_ERROR, "Can't read previous pos for file %s (%s:%d)", P->Ifilename, __FILE__, __LINE__);
		DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
		TRACE_OUT(P->A);
		return DPS_ERROR;
	      }
	      P->Item.next = 0;
	      if (lseek(P->Ifd, (off_t)P->PreviousItemPos, SEEK_SET) == (off_t)-1) {
		DpsLog(P->A, DPS_LOG_ERROR, "Can't seeek for file %s (%s:%d)", P->Ifilename, __FILE__, __LINE__);
		DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
		TRACE_OUT(P->A);
		return DPS_ERROR;
	      }
	      if ((wr = write(P->Ifd, &P->Item, sizeof(DPS_BASEITEM))) != sizeof(DPS_BASEITEM)) {
		DpsLog(P->A, DPS_LOG_ERROR, "Can't write previous pos for file %s (%s:%d)", P->Ifilename, __FILE__, __LINE__);
		DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
		TRACE_OUT(P->A);
		return DPS_ERROR;
	      }
	      goto search_again;
	
	    } else {
	      DpsLog(P->A, DPS_LOG_ERROR, "Can't read hash chain for file %s %d of %d bytes (%s:%d)", 
		     P->Ifilename, wr, sizeof(DPS_BASEITEM), __FILE__, __LINE__);
	      DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
	      TRACE_OUT(P->A);
	      return DPS_ERROR;
	    }
	  }
#ifdef DEBUG_SEARCH
	  stop_ticks = DpsStartTimer();
	  total_ticks = stop_ticks - start_ticks;
	  DpsLog(P->A, DPS_LOG_EXTRA, "OpenBase3 %03X in %.5f sec.", P->FileNo, (float)total_ticks / 1000);
#endif
	}
      break;
    search_again:;
    }
  P->opened = 1;
  P->mode = mode;
#ifdef DEBUG_SEARCH
  stop_ticks = DpsStartTimer();
  total_ticks = stop_ticks - start_ticks;
  DpsLog(P->A, DPS_LOG_EXTRA, "OpenBase4 %03X in %.5f sec.\n", P->FileNo, (float)total_ticks / 1000);
#endif
/*  fprintf(stderr, "Sfd:0x%x - %s\n", P->Sfd, P->Sfilename);
  fprintf(stderr, "Ifd:0x%x - %s\n", P->Ifd, P->Ifilename);*/
  TRACE_OUT(P->A);
  return DPS_OK;
}
示例#13
0
__C_LINK int __DPSCALL DpsBaseWrite(DPS_BASE_PARAM *P, void *buffer, size_t len) {
  dps_uint8 NewItemPos;
  int res = DPS_OK;
  size_t size = len;
  size_t orig_size = 0;
  void *data = buffer;

#ifdef HAVE_ZLIB
  z_stream zstream;
  Byte *CData = NULL;

  bzero(&zstream, sizeof(zstream));

  zstream.zalloc = Z_NULL;
  zstream.zfree = Z_NULL;
  zstream.opaque = Z_NULL;
  zstream.next_in = buffer;

  if ( (P->zlib_method == Z_DEFLATED) 
       && (deflateInit2(&zstream, P->zlib_level, Z_DEFLATED, P->zlib_windowBits, P->zlib_memLevel, P->zlib_strategy) == Z_OK) ) {
    
      zstream.avail_in = (uInt)len;
      zstream.avail_out = (uInt)(/*sizeof(gz_header) +*/ 4096 + 2 * len);
    CData = zstream.next_out = (Byte *) DpsMalloc(zstream.avail_out);
    if (zstream.next_out == NULL) {
      return DPS_ERROR;
    }
    deflate(&zstream, Z_FINISH);
    deflateEnd(&zstream);
    orig_size = len;
    size = zstream.total_out;
    data = CData;
    
  }

#endif


  if ((res = DpsBaseSeek(P, DPS_WRITE_LOCK)) != DPS_OK) {
    goto DpsBaseWrite_exit;
  }

  if (P->Item.rec_id == P->rec_id) {
    if (P->Item.size < size) {
      if ((P->Item.offset = (dps_uint8)lseek(P->Sfd, (off_t)0, SEEK_END)) == (dps_uint8)-1) {
	DpsLog(P->A, DPS_LOG_ERROR, "Can't seek file %s {%s:%d}", P->Sfilename, __FILE__, __LINE__);
	res = DPS_ERROR;
	goto DpsBaseWrite_exit;
      }
    } else {
      if (lseek(P->Sfd, (off_t)P->Item.offset, SEEK_SET) == (off_t)-1) {
	DpsLog(P->A, DPS_LOG_ERROR, "Can't seek file %s offset %ld {%s:%d}", P->Sfilename, (long)P->Item.offset, __FILE__, __LINE__);
	res = DPS_ERROR;
	goto DpsBaseWrite_exit;
      }
    }
  } else { /* new rec_id added */
    if (P->mishash && P->Item.rec_id != 0) {
      if ((P->Item.next = NewItemPos = (dps_uint8)lseek(P->Ifd, (off_t)0, SEEK_END)) == (dps_uint8)-1) {
	DpsLog(P->A, DPS_LOG_ERROR, "Can't seek file %s {%s:%d}", P->Ifilename, __FILE__, __LINE__);
	res = DPS_ERROR;
	goto DpsBaseWrite_exit;
      }
      if (lseek(P->Ifd, (off_t)P->CurrentItemPos, SEEK_SET) == (off_t)-1) {
	res = DPS_ERROR;
	goto DpsBaseWrite_exit;
      }
      if (write(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) != sizeof(DPS_BASEITEM)) {
	res = DPS_ERROR;
	goto DpsBaseWrite_exit;
      }
      P->CurrentItemPos = NewItemPos;
      P->Item.next = (off_t)0;
    }
    P->Item.rec_id = P->rec_id;
    if ((P->Item.offset = (dps_uint8)lseek(P->Sfd, (off_t)0, SEEK_END)) == (dps_uint8)-1) {
      DpsLog(P->A, DPS_LOG_ERROR, "Can't seek file %s {%s:%d}", P->Sfilename, __FILE__, __LINE__);
      res = DPS_ERROR;
      goto DpsBaseWrite_exit;
    }
  }
  if (write(P->Sfd, data, size) != (ssize_t)size) {
    dps_strerror(P->A, DPS_LOG_ERROR, "Can't write %ld bytes at %ld of file %s {%s:%d}",
		 (long)size, (long)P->Item.offset, P->Sfilename, __FILE__, __LINE__);
    res = DPS_ERROR;
    goto DpsBaseWrite_exit;
  }
  if (lseek(P->Ifd, (off_t)P->CurrentItemPos, SEEK_SET) == (off_t)-1) {
    DpsLog(P->A, DPS_LOG_ERROR, "Can't seek file %s {%s:%d}", P->Ifilename, __FILE__, __LINE__);
    res = DPS_ERROR;
    goto DpsBaseWrite_exit;
  }

  P->Item.size = size;
  P->Item.orig_size = orig_size;
  if (write(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) != sizeof(DPS_BASEITEM)) {
    DpsLog(P->A, DPS_LOG_ERROR, "Can't write index for file %s {%s:%d}", P->Ifilename, __FILE__, __LINE__);
  }
/*  DpsBaseFsync(P->A, P);*/
#ifdef DEBUG_SEARCH
  DpsLog(P->A, DPS_LOG_DEBUG, "[%s/%s] Stored rec_id: %x Size: %d", P->subdir, P->basename, P->rec_id, len);
#endif

 DpsBaseWrite_exit:

#ifdef HAVE_ZLIB
  DPS_FREE(CData);
#endif
  return res;
}
示例#14
0
extern __C_LINK int __DPSCALL DpsBaseRelocate(DPS_AGENT *Agent, int base_type) {
  DPS_BASE_PARAM O, N;
  DPS_BASE_PARAM *Old = &O, *New = &N;
  size_t base, i, ndel, mdel = 128, data_len;
  urlid_t *todel = (int*)DpsMalloc(128 * sizeof(urlid_t));
  void *data;

  bzero(Old, sizeof(O));
  bzero(New, sizeof(N));

  switch(base_type) {
  case 0: /* stored */
    Old->subdir = "store";
    Old->basename = "doc";
    Old->indname = "doc";
    Old->mode = DPS_WRITE_LOCK;
    Old->NFiles = (size_t)DpsVarListFindInt(&Agent->Vars, "OldStoredFiles", 0x100);
    Old->vardir = DpsVarListFindStr(&Agent->Vars, "VarDir", DPS_VAR_DIR);
    Old->A = Agent;
    New->subdir = "store";
    New->basename = "doc";
    New->indname = "doc";
    New->mode = DPS_WRITE_LOCK;
    New->NFiles = (size_t)DpsVarListFindInt(&Agent->Vars, "StoredFiles", 0x100);
    New->vardir = DpsVarListFindStr(&Agent->Vars, "VarDir", DPS_VAR_DIR);
    New->A = Agent;
    DpsLog(Agent, DPS_LOG_INFO, "Relocating stored database");
    break;
  case 1: /* URL data */
    Old->subdir = DPS_URLDIR;
    Old->basename = "info";
    Old->indname = "info";
    Old->mode = DPS_WRITE_LOCK;
    Old->NFiles = (size_t)DpsVarListFindInt(&Agent->Vars, "OldURLDataFiles", 0x300);
    Old->vardir = DpsVarListFindStr(&Agent->Vars, "VarDir", DPS_VAR_DIR);
    Old->A = Agent;
#ifdef HAVE_ZLIB
    O.zlib_method = Z_DEFLATED;
    O.zlib_level = 9;
    O.zlib_windowBits = DPS_BASE_INFO_WINDOWBITS;
    O.zlib_memLevel = 9;
    O.zlib_strategy = DPS_BASE_INFO_STRATEGY;
#endif
    New->subdir = DPS_URLDIR;
    New->basename = "info";
    New->indname = "info";
    New->mode = DPS_WRITE_LOCK;
    New->NFiles = (size_t)DpsVarListFindInt(&Agent->Vars, "URLDataFiles", 0x300);
    New->vardir = DpsVarListFindStr(&Agent->Vars, "VarDir", DPS_VAR_DIR);
    New->A = Agent;
#ifdef HAVE_ZLIB
    N.zlib_method = Z_DEFLATED;
    N.zlib_level = 9;
    N.zlib_windowBits = DPS_BASE_INFO_WINDOWBITS;
    N.zlib_memLevel = 9;
    N.zlib_strategy = DPS_BASE_INFO_STRATEGY;
#endif
    DpsLog(Agent, DPS_LOG_INFO, "Relocating URLData database");
    break;

  case 2: /* tree wrd */
    Old->subdir = DPS_TREEDIR;
    Old->basename = "wrd";
    Old->indname = "wrd";
    Old->mode = DPS_WRITE_LOCK;
    Old->NFiles = (size_t)DpsVarListFindInt(&Agent->Vars, "OldWrdFiles", 0x300);
    Old->vardir = DpsVarListFindStr(&Agent->Vars, "VarDir", DPS_VAR_DIR);
    Old->A = Agent;
#ifdef HAVE_ZLIB
    O.zlib_method = Z_DEFLATED;
    O.zlib_level = 9;
    O.zlib_windowBits = DPS_BASE_WRD_WINDOWBITS;
    O.zlib_memLevel = 9;
    O.zlib_strategy = DPS_BASE_WRD_STRATEGY;
#endif
    New->subdir = DPS_TREEDIR;
    New->basename = "wrd";
    New->indname = "wrd";
    New->mode = DPS_WRITE_LOCK;
    New->NFiles = (size_t)DpsVarListFindInt(&Agent->Vars, "WrdFiles", 0x300);
    New->vardir = DpsVarListFindStr(&Agent->Vars, "VarDir", DPS_VAR_DIR);
    New->A = Agent;
#ifdef HAVE_ZLIB
    N.zlib_method = Z_DEFLATED;
    N.zlib_level = 9;
    N.zlib_windowBits = DPS_BASE_WRD_WINDOWBITS;
    N.zlib_memLevel = 9;
    N.zlib_strategy = DPS_BASE_WRD_STRATEGY;
#endif
    DpsLog(Agent, DPS_LOG_INFO, "Relocating Wrd database");
    break;
    
  default:
      DPS_FREE(todel);
      return DPS_OK;
  }

  for (base = 0; base < O.NFiles; base++) {
    ndel = 0;
    if (have_sigterm || have_sigint || have_sigalrm) {
      DpsLog(Agent, DPS_LOG_EXTRA, "%s signal received. Exiting chackup", (have_sigterm) ? "SIGTERM" :
	     (have_sigint) ? "SIGINT" : "SIGALRM");
      DpsBaseClose(Old);
      DpsBaseClose(New);
      DPS_FREE(todel);
      return DPS_OK;
    }

    Old->rec_id = (urlid_t)(base << DPS_BASE_BITS);
    if (DpsBaseOpen(Old, DPS_READ_LOCK) != DPS_OK) {
      DpsBaseClose(Old);
      DpsBaseClose(New);
      continue;
    }
    if (lseek(O.Ifd, (off_t)0, SEEK_SET) == (off_t)-1) {
      DpsLog(Agent, DPS_LOG_ERROR, "Can't seeek for file %s", Old->Ifilename);
      DpsBaseClose(Old);
      DpsBaseClose(New);
      DPS_FREE(todel);
      return DPS_ERROR;
    }
    while (read(Old->Ifd, &Old->Item, sizeof(DPS_BASEITEM)) == sizeof(DPS_BASEITEM)) {
      if (Old->Item.rec_id != 0) {
	if (ndel >= mdel) {
	  mdel += 128;
	  todel = (urlid_t*)DpsRealloc(todel, mdel * sizeof(urlid_t));
	  if (todel == NULL) {
	    DpsBaseClose(Old);
	    DpsBaseClose(New);
	    DpsLog(Agent, DPS_LOG_ERROR, "Can't realloc %d bytes %s:%d", mdel * sizeof(urlid_t),  __FILE__, __LINE__);
	    DPS_FREE(todel);
	    return DPS_ERROR;
	  }
	}
	todel[ndel++] = Old->Item.rec_id;
      }
    }
    DpsBaseClose(Old);
    for (i = 0; i < ndel; i++) {
      Old->rec_id = todel[i];
      data = DpsBaseARead(Old, &data_len);
      if (data == NULL) continue;
      DpsBaseDelete(Old);
      DpsBaseClose(Old);
      New->rec_id = todel[i];
      DpsBaseWrite(New, data, data_len);
      DpsBaseClose(New);
      DPS_FREE(data);
    }
    DpsLog(Agent, DPS_LOG_EXTRA, "\tbase: %d [0x%x], %d records relocated", base, base, ndel);
  }
  DPS_FREE(todel);
  for (base = N.NFiles; base < O.NFiles; base++) {
      Old->rec_id = (urlid_t)(base << DPS_BASE_BITS);
    if (DpsBaseOpen(Old, DPS_READ_LOCK) != DPS_OK) {
      DpsBaseClose(Old);
      continue;
    }
    unlink(O.Ifilename);
    unlink(O.Sfilename);
    DpsBaseClose(Old);
  }
  return DPS_OK;
}
示例#15
0
int DpsSearchdGetWordResponse(DPS_AGENT *query,DPS_RESULT *Res,DPS_DB *cl) {
	DPS_URL_CRD_DB *wrd = NULL;
	DPS_URLDATA *udt = NULL;
#ifdef WITH_REL_TRACK
	DPS_URLTRACK *trk = NULL;
#endif
	DPS_SEARCHD_PACKET_HEADER hdr;
	ssize_t	nrecv;
	char	*msg;
	int	done=0, rc = DPS_OK;
	char *wbuf, *p;
	DPS_WIDEWORDLIST_EX *wwl;
	DPS_WIDEWORD *ww_ex;
	DPS_WIDEWORD ww;
	size_t i;

	TRACE_IN(query, "DpsSearchdGetWordResponse");
	
	Res->total_found=0;
	
	while(!done){
	  nrecv = DpsRecvall(cl->searchd, &hdr, sizeof(hdr), 360);
	  if(nrecv!=sizeof(hdr)){
	    sprintf(query->Conf->errstr,"Received incomplete header from searchd (%d bytes,errno:%d)",(int)nrecv, errno);
	    TRACE_OUT(query);
	    return DPS_ERROR;;
	  }
#ifdef DEBUG_SDP
	  DpsLog(query, DPS_LOG_ERROR, "Received header cmd=%d len=%d\n",hdr.cmd,hdr.len);
#endif
		switch(hdr.cmd){
			case DPS_SEARCHD_CMD_ERROR:
				msg=(char*)DpsMalloc(hdr.len+1);
				if (msg == NULL) {
				  done = 1;
				  break;
				}
				nrecv = DpsRecvall(cl->searchd, msg, hdr.len, 360);
				if (nrecv >= 0) {
				    msg[nrecv]='\0';
				    sprintf(query->Conf->errstr,"Searchd error: '%s',received:%d", msg, (int)nrecv);
				}
				rc = DPS_ERROR;
				DPS_FREE(msg);
				done=1;
				break;
			case DPS_SEARCHD_CMD_MESSAGE:
				msg=(char*)DpsMalloc(hdr.len+1);
				if (msg == NULL) {
				  done = 1;
				  break;
				}
				nrecv = DpsRecvall(cl->searchd, msg, hdr.len, 360);
				msg[(nrecv >= 0) ? nrecv : 0] = '\0';
				if (strncmp(msg, "Total_found", 11) == 0) {
				  Res->total_found = (size_t)DPS_ATOI(msg + 12);
				  Res->grand_total = (size_t)DPS_ATOI(strchr(msg + 12, (int)' ') + 1);
				}
#ifdef DEBUG_SDP
				DpsLog(query, DPS_LOG_ERROR, "Message from searchd: '%s'\n",msg);
#endif
				DPS_FREE(msg);
				break;
			case DPS_SEARCHD_CMD_WORDS:
				DPS_FREE(wrd);
				wrd=(DPS_URL_CRD_DB*)DpsMalloc(hdr.len + 1);
				if (wrd == NULL) {
				  done = 1;
				  break;
				}
				nrecv = DpsRecvall(cl->searchd, wrd, hdr.len, 360);
				/*Res->total_found=hdr.len/sizeof(*wrd);*/
				Res->num_rows = (nrecv >= 0) ? (size_t)nrecv / sizeof(*wrd) : 0;
#ifdef DEBUG_SDP
				DpsLog(query, DPS_LOG_ERROR, "Received words size=%d nwrd=%d\n",hdr.len, Res->num_rows /*Res->total_found*/);
#endif
				done=1;
				break;
		        case DPS_SEARCHD_CMD_SUGGEST:
			        DPS_FREE(Res->Suggest);
				Res->Suggest = (char*)DpsMalloc(hdr.len + 1);
				if (Res->Suggest == NULL) {
				  done = 1; break;
				}
				nrecv = DpsRecvall(cl->searchd, Res->Suggest, hdr.len, 360);
				Res->Suggest[(nrecv >=0) ? nrecv : 0] = '\0';
#ifdef DEBUG_SDP
				DpsLog(query, DPS_LOG_ERROR, "Received Suggest size=%d\n", hdr.len);
#endif
				break;

		        case DPS_SEARCHD_CMD_PERSITE:
			        Res->PerSite = (size_t*)DpsMalloc(hdr.len + 1);
				if (Res->PerSite == NULL) {
				  done = 1;
				  break;
				}
				nrecv = DpsRecvall(cl->searchd, Res->PerSite, hdr.len, 360);
#ifdef DEBUG_SDP
				DpsLog(query, DPS_LOG_ERROR, "Received PerSite size=%d nwrd=%d\n", nrecv, Res->num_rows/*Res->total_found*/);
#endif
				break;
		        case DPS_SEARCHD_CMD_DATA:
			        udt = (DPS_URLDATA*)DpsMalloc(hdr.len + 1);
				if (udt == NULL) {
				  done = 1;
				  break;
				}
				nrecv = DpsRecvall(cl->searchd, udt, hdr.len, 360);
#ifdef DEBUG_SDP
				DpsLog(query, DPS_LOG_ERROR, "Received URLDATA size=%d nwrd=%d\n", nrecv, Res->num_rows);
#endif
				break;

#ifdef WITH_REL_TRACK
		        case DPS_SEARCHD_CMD_TRACKDATA:
			        trk = (DPS_URLTRACK*)DpsMalloc(hdr.len + 1);
				if (trk == NULL) {
				  done = 1;
				  break;
				}
				nrecv = DpsRecvall(cl->searchd, trk, hdr.len, 360);
#ifdef DEBUG_SDP
				DpsLog(query, DPS_LOG_ERROR, "Received TRACKDATA size=%d nwrd=%d\n", nrecv, Res->num_rows);
#endif
				break;
#endif

		        case DPS_SEARCHD_CMD_WITHOFFSET:
/*				Res->offset = 1;*/
				break;
		        case DPS_SEARCHD_CMD_QLC:
			        if ((p = (char *)DpsXmalloc(hdr.len + 1)) != NULL) {
				  if (DpsRecvall(cl->searchd, p, hdr.len, 360))  {
				    DpsVarListReplaceStr(&query->Vars, "q", p);
				  }
				}
				DPS_FREE(p);
				break;
		        case DPS_SEARCHD_CMD_WWL:
				Res->PerSite = NULL;
			        if ((wbuf = p = (char *)DpsXmalloc(hdr.len + 1)) != NULL) 
				  if (DpsRecvall(cl->searchd, wbuf, hdr.len, 360))  {
				    wwl = (DPS_WIDEWORDLIST_EX *)p;
				    p += sizeof(DPS_WIDEWORDLIST_EX);
#ifdef DEBUG_SDP
				    DpsLog(query, DPS_LOG_ERROR, "wbuf :%x, wwl: %x, p: %x hdr.len:%d\n", wbuf, wwl, p, hdr.len);
				    DpsLog(query, DPS_LOG_ERROR, "Received WWL nwords=%d nuniq=%d\n", wwl->nwords, wwl->nuniq);
#endif
/*				    DpsWideWordListFree(&Res->WWList);*/
				    for(i = 0; i < wwl->nwords; i++) {
/*				      ww_ex = (DPS_WIDEWORD_EX *)((void*)&p[0]);*/
				      dps_memcpy((char*)&ww, p, sizeof(DPS_WIDEWORD_EX));
				      p += sizeof(DPS_WIDEWORD_EX);
/*
				      ww.order = ww_ex->order;
				      ww.order_inquery = ww_ex->order_inquery;
				      ww.count = ww_ex->count;
				      ww.len = ww_ex->len;
				      ww.ulen = ww_ex->ulen;
				      ww.origin = ww_ex->origin;
				      ww.crcword = ww_ex->crcword;
*/				      
				      ww.word = p;
#ifdef DEBUG_SDP
				      DpsLog(query, DPS_LOG_ERROR, "Word {%d}: %s\n", ww.len+1, ww.word);
#endif
				      p += ww.len + 1;
				      p += sizeof(dpsunicode_t) - ((SDPALIGN)p % sizeof(dpsunicode_t));
				      ww.uword = (dpsunicode_t*)p;
				      p += sizeof(dpsunicode_t) * (ww.ulen + 1);
				      DpsWideWordListAdd(&Res->WWList, &ww, DPS_WWL_STRICT);
				    }
				    Res->WWList.nuniq = wwl->nuniq;
				    DPS_FREE(wbuf);
				  }
				break;
			default:
				sprintf(query->Conf->errstr,"Unknown searchd response: cmd=%d len=%d",hdr.cmd,hdr.len);
				rc = DPS_ERROR;
				done=1;
				break;
		}
	}
	Res->CoordList.Coords = wrd;
	Res->CoordList.Data = udt;
#ifdef WITH_REL_TRACK
	Res->CoordList.Track = trk;
#endif
	TRACE_OUT(query);
	return rc;
}
示例#16
0
__C_LINK int __DPSCALL DpsSynonymListLoad(DPS_ENV * Env,const char * filename){
     struct stat     sb;
     char      *str, *data = NULL, *cur_n = NULL;
     char      lang[64]="";
     DPS_CHARSET    *cs=NULL;
     DPS_CHARSET    *sys_int=DpsGetCharSet("sys-int");
     DPS_CONV  file_uni;
     DPS_WIDEWORD    *ww = NULL;
     size_t key = 1;
     int flag_th = 0;
     int             fd;
     char            savebyte;
     
     if (stat(filename, &sb)) {
       fprintf(stderr, "Unable to stat synonyms file '%s': %s", filename, strerror(errno));
       return DPS_ERROR;
     }
     if ((fd = DpsOpen2(filename, O_RDONLY)) <= 0) {
       dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to open synonyms file '%s': %s", filename, strerror(errno));
       return DPS_ERROR;
     }
     if ((data = (char*)DpsMalloc(sb.st_size + 1)) == NULL) {
       dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to alloc %d bytes", sb.st_size);
       DpsClose(fd);
       return DPS_ERROR;
     }
     if (read(fd, data, sb.st_size) != (ssize_t)sb.st_size) {
       dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to read synonym file '%s': %s", filename, strerror(errno));
       DPS_FREE(data);
       DpsClose(fd);
       return DPS_ERROR;
     }
     data[sb.st_size] = '\0';
     str = data;
     cur_n = strchr(str, '\n');
     if (cur_n != NULL) {
       cur_n++;
       savebyte = *cur_n;
       *cur_n = '\0';
     }

     while(str != NULL) {
          if(str[0]=='#'||str[0]==' '||str[0]=='\t'||str[0]=='\r'||str[0]=='\n') goto loop_continue;
          
          if(!strncasecmp(str,"Charset:",8)){
               char * lasttok;
               char * charset;
               if((charset = dps_strtok_r(str + 8, " \t\n\r", &lasttok))) {
                    cs=DpsGetCharSet(charset);
                    if(!cs){
                         dps_snprintf(Env->errstr, sizeof(Env->errstr), "Unknown charset '%s' in synonyms file '%s'",
                                   charset, filename);
                         DPS_FREE(data);
			 DpsClose(fd);
                         return DPS_ERROR;
                    }
                    DpsConvInit(&file_uni, cs, sys_int, Env->CharsToEscape, 0);
               }
          }else
          if(!strncasecmp(str,"Language:",9)){
               char * lasttok;
               char * l;
               if((l = dps_strtok_r(str + 9, " \t\n\r", &lasttok))) {
                    dps_strncpy(lang, l, sizeof(lang)-1);
               }
          }else
          if(!strncasecmp(str, "Thesaurus:", 10)) {
               char * lasttok;
	       char *tok = dps_strtok_r(str + 10, " \t\n\r", &lasttok);
	       flag_th = (strncasecmp(tok, "yes", 3) == 0) ? 1 : 0;
          }else{
               char      *av[255];
               size_t         ac, i, j;
	       dpsunicode_t *t;

               if(!cs){
                    dps_snprintf(Env->errstr,sizeof(Env->errstr)-1,"No Charset command in synonyms file '%s'",filename);
                    DpsClose(fd); DPS_FREE(data);
                    return DPS_ERROR;
               }
               if(!lang[0]){
                    dps_snprintf(Env->errstr,sizeof(Env->errstr)-1,"No Language command in synonyms file '%s'",filename);
                    DpsClose(fd); DPS_FREE(data);
                    return DPS_ERROR;
               }

               ac = DpsGetArgs(str, av, 255);
               if (ac < 2) goto loop_continue;

               if ((ww = (DPS_WIDEWORD*)DpsRealloc(ww, ac * sizeof(DPS_WIDEWORD))) == NULL) return DPS_ERROR;

               for (i = 0; i < ac; i++) {
                 ww[i].word = av[i];
                 ww[i].len = dps_strlen(av[i]);
                 ww[i].uword = t = (dpsunicode_t*)DpsMalloc((3 * ww[i].len + 1) * sizeof(dpsunicode_t));
		 if (ww[i].uword == NULL) return DPS_ERROR;
                 DpsConv(&file_uni, (char*)ww[i].uword, sizeof(dpsunicode_t) * (3 * ww[i].len + 1), av[i], ww[i].len + 1);
                 DpsUniStrToLower(ww[i].uword);
		 ww[i].uword = DpsUniNormalizeNFC(NULL, ww[i].uword);
		 DPS_FREE(t);
               }

               for (i = 0; i < ac - 1; i++) {
                 for (j = i + 1; j < ac; j++) {

                   if((Env->Synonyms.nsynonyms + 1) >= Env->Synonyms.msynonyms){
                    Env->Synonyms.msynonyms += 64;
                    Env->Synonyms.Synonym = (DPS_SYNONYM*)DpsRealloc(Env->Synonyms.Synonym, 
                                                   sizeof(DPS_SYNONYM)*Env->Synonyms.msynonyms);
		    if (Env->Synonyms.Synonym == NULL) {
		      Env->Synonyms.msynonyms = Env->Synonyms.nsynonyms = 0;
		      return DPS_ERROR;
  		    }
                   }
               
                   bzero((void*)&Env->Synonyms.Synonym[Env->Synonyms.nsynonyms], sizeof(DPS_SYNONYM));
               
                   /* Add direct order */
                   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.uword = DpsUniDup(ww[i].uword);
                   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.uword = DpsUniDup(ww[j].uword);
		   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.count = 
		     Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.count = (size_t)((flag_th) ? key : 0);
                   Env->Synonyms.nsynonyms++;
               
                   bzero((void*)&Env->Synonyms.Synonym[Env->Synonyms.nsynonyms], sizeof(DPS_SYNONYM));
               
                   /* Add reverse order */
                   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.uword = DpsUniDup(ww[j].uword);
                   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.uword = DpsUniDup(ww[i].uword);
		   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.count = 
		     Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.count = (size_t)((flag_th) ? key : 0);
                   Env->Synonyms.nsynonyms++;
                 }
               }

               for (i = 0; i < ac; i++) {
                 DPS_FREE(ww[i].uword);
               }
               do { key++; } while (key == 0);
          }
     loop_continue:
	  str = cur_n;
	  if (str != NULL) {
	    *str = savebyte;
	    cur_n = strchr(str, '\n');
	    if (cur_n != NULL) {
	      cur_n++;
	      savebyte = *cur_n;
	      *cur_n = '\0';
	    }
	  }
     }
     DPS_FREE(data);
     DPS_FREE(ww);
     DpsClose(fd);
     return DPS_OK;
}
示例#17
0
int __DPSCALL DpsFindWordsSearchd(DPS_AGENT *query, DPS_RESULT *Res, DPS_DB *searchd) {
	size_t		maxlen = 1024;
	char		*request, *edf = NULL, *e_empty = NULL;
	const char *df = DpsVarListFindStr(&query->Vars, "DateFormat", NULL);
	const char *empty = DpsVarListFindStr(&query->Vars, "empty", NULL);
	const char *qs = DpsVarListFindStr(&query->Vars, "QUERY_STRING", "");
	const char *tmplt = DpsVarListFindStr(&query->Vars, "tmplt", "");
	int		res=DPS_OK;

	TRACE_IN(query, "DpsFindWordsSearchd");

	if (df) {
	  edf = (char*)DpsMalloc(dps_strlen(df) * 10 + 1);
	  if (edf == NULL) {
		sprintf(query->Conf->errstr,"Can't allocate memory");
		TRACE_OUT(query);
		return DPS_ERROR;
	  }
	  DpsEscapeURL(edf, df);
	  maxlen += dps_strlen(edf);
	}
	if (empty) {
	  e_empty = (char*)DpsMalloc(dps_strlen(empty) * 10 + 1);
	  if (e_empty == NULL) {
		sprintf(query->Conf->errstr, "Can't allocate memory");
		TRACE_OUT(query);
		return DPS_ERROR;
	  }
	  DpsEscapeURL(e_empty, empty);
	  maxlen += dps_strlen(e_empty);
	}

	maxlen += dps_strlen(qs) + dps_strlen(tmplt) + 64;

	if (NULL==(request=(char*)DpsMalloc(maxlen))) {
		sprintf(query->Conf->errstr,"Can't allocate memory");
		DPS_FREE(edf);
		TRACE_OUT(query);
		return DPS_ERROR;
	}
	
     dps_snprintf(request, maxlen, "%s&BrowserCharset=%s&IP=%s&g-lc=%s&ExcerptSize=%s&ExcerptPadding=%s&DoExcerpt=%s&tmplt=%s%s%s%s%s%s%s&sp=%s&sy=%s&s=%s",
		  qs,
		  DpsVarListFindStr(&query->Vars, "BrowserCharset", "iso-8859-1"),
		  DpsVarListFindStr(&query->Vars, "IP", "localhost"),
		  DpsVarListFindStr(&query->Vars, "g-lc", "en"),
		  DpsVarListFindStr(&query->Vars, "ExcerptSize", "256"),
		  DpsVarListFindStr(&query->Vars, "ExcerptPadding", "40"),
		  (query->Flags.do_excerpt) ? "yes" : "no",
		  tmplt,
		  (edf) ? "&DateFormat=" : "", (edf) ? edf : "",
		  (e_empty) ? "&empty=" : "", (e_empty) ? e_empty : "",
		  (searchd->label) ? "&label=" : "", (searchd->label) ? searchd->label : "",
		  DpsVarListFindStr(&query->Vars, "sp", "1"),
		  DpsVarListFindStr(&query->Vars, "sy", "1"),
		  DpsVarListFindStr(&query->Vars, "s", "RP")
		  );
	DPS_FREE(edf);
	DPS_FREE(e_empty);

	request[maxlen-1]='\0';
	res = DpsSearchdSendWordRequest(query, searchd, request);
	DPS_FREE(request);
	if (DPS_OK != res) {
	  TRACE_OUT(query);
	  return res;
	}

/*	res = DpsSearchdGetWordResponse(query, Res, searchd);   called later from DpsFind */
	
	TRACE_OUT(query);
	return res;
}
示例#18
0
void DpsCookiesFind(DPS_AGENT *Indexer, DPS_SERVER *Server, DPS_DOCUMENT *Doc, const char *hostinfo) {
#ifdef HAVE_SQL
  DPS_DSTR cookie;
  DPS_COOKIES *Cookies = &Indexer->Cookies;
  DPS_COOKIE *Coo;
  size_t i, blen = dps_strlen(hostinfo), slen;
  int have_no_cookies = DpsVarListFindInt(&Doc->Sections, "have_no_cookies", 1);
#ifdef WITH_PARANOIA
  void *paran = DpsViolationEnter(paran);
#endif
  TRACE_IN(Indexer, "DpsCookiesFind");

  DpsDSTRInit(&cookie, 1024);
  for(i = 0; i < Cookies->ncookies; i++) {
    Coo = &Cookies->Cookie[i];
    slen = dps_strlen(Coo->domain);
    if (slen > blen) continue;
    if (Coo->secure == 'y' && strcasecmp(Doc->CurURL.schema, "https")) continue;
    if (strncasecmp(Coo->path, Doc->CurURL.path, dps_strlen(Coo->path))) continue;
    if (strcasecmp(Coo->domain, hostinfo + (blen - slen))) continue;
    if (Coo->from_config != 1) have_no_cookies = 0;
    if (Coo->name[0] == '\0' && Coo->value[0] == '\0') continue;
    if (cookie.data_size)
      DpsDSTRAppend(&cookie, "; ", 2);
    DpsDSTRAppendStr(&cookie, Coo->name);
    DpsDSTRAppend(&cookie, "=", 1);
    DpsDSTRAppendStr(&cookie, Coo->value);
  }
  if (have_no_cookies) {
    char buf[2*PATH_MAX];
    dpshash32_t url_id;
    DPS_DB *db;
    DPS_SQLRES Res;
    size_t rows;
    int rc;


	    if (Server != NULL) {
	      char *PingData = DpsVarListFindStr(&Server->Vars, "AuthPing", NULL);
	      if (PingData != NULL) {
		char *AuthPing = DpsStrdup(DpsTrim(PingData, " \t\r\n"));
		int method = DPS_METHOD_GET;
		dps_base64_decode(AuthPing, PingData, dps_strlen(PingData));
		if (!strncasecmp(AuthPing, "GET", 3)) {
		  method = DPS_METHOD_GET;
		  PingData = DpsTrim(AuthPing + 3, " \t\r\n");
		} else if (!strncasecmp(AuthPing, "POST", 4)) {
		  method = DPS_METHOD_POST;
		  PingData = DpsTrim(AuthPing + 4, " \t\r\n");
		} else {
		  DpsLog(Indexer, DPS_LOG_ERROR, "AuthPing should be GET or POST: %s", AuthPing);
		  PingData = NULL;
		}
		if (PingData != NULL) {
		  size_t size = dps_strlen(PingData);
		  {
		    char PingURL[size + 2];
		    char PingBody[size];
		    DPS_DOCUMENT *rDoc;
		    int result;

		    rDoc = DpsDocInit(NULL);
		    DpsSpiderParamInit(&rDoc->Spider);
		    DpsVarList2Doc(rDoc, Server);
		    rDoc->Buf.max_size = (size_t)DpsVarListFindInt(&Indexer->Vars, "MaxDocSize", DPS_MAXDOCSIZE);
		    rDoc->Buf.allocated_size = DPS_NET_BUF_SIZE;
		    if ((rDoc->Buf.buf = (char*)DpsMalloc(rDoc->Buf.allocated_size + 1)) == NULL) {
		      DpsDocFree(rDoc);
		      TRACE_OUT(Indexer);
		      return;
		    }
		    rDoc->Buf.buf[0]='\0';
		    rDoc->subdoc = Indexer->Flags.SubDocLevel + 1;

#if 1
		    dps_snprintf(buf, sizeof(buf), "%s://%s/", DPS_NULL2EMPTY(Doc->CurURL.schema), DPS_NULL2EMPTY(Doc->CurURL.hostinfo));
		    DpsVarListReplaceStr(&rDoc->Sections, "URL", buf);
		    DpsURLParse(&rDoc->CurURL, buf);
		    DpsLog(Indexer, DPS_LOG_INFO, "HOME: %s", buf);
		    rDoc->method = DPS_METHOD_HEAD;
		    /*		    DpsVarListFree(&rDoc->RequestHeaders);*/
		    if (Doc != NULL) {
		      DpsVarListReplaceLst(&rDoc->RequestHeaders, &Doc->RequestHeaders, NULL, "*"); 
		    }

		    DpsVarListReplaceStr(&rDoc->Sections, "have_no_cookies", "0");
		    DpsDocAddDocExtraHeaders(Indexer, Server, rDoc);
		    DpsDocAddConfExtraHeaders(Indexer->Conf, rDoc);
		    DpsVarListReplaceLst(&rDoc->Sections, &Server->Vars, NULL, "*");
		    DpsDocAddServExtraHeaders(Server, rDoc);
		    DpsVarListLog(Indexer, &rDoc->RequestHeaders, DPS_LOG_DEBUG, "HOME.Request");
		    if (Doc == NULL || Indexer->Flags.cmd == DPS_IND_FILTER) {
		      DpsDocLookupConn(Indexer, rDoc);
		    } else {
		      DPS_FREE(rDoc->connp.connp);
		      rDoc->connp = Doc->connp;
		    }
		    result = DpsGetURL(Indexer, rDoc, NULL); /* Just get headers from the home as we need only Cookies from it */
		    DpsDocProcessResponseHeaders(Indexer, rDoc);
		    DpsVarListLog(Indexer, &rDoc->Sections, DPS_LOG_DEBUG, "HOME.Response");
#endif

		    sscanf(PingData, "%s %s", PingURL, PingBody);
		    if (rDoc->method == DPS_METHOD_GET) {
		      dps_strcat(PingURL, "?");
		      dps_strcat(PingURL, PingBody);
		    } else {
		      DpsVarListReplaceStr(&rDoc->Sections, "body", PingBody);
		    }
		    DpsVarListReplaceStr(&rDoc->Sections, "URL", PingURL);
		    DpsURLParse(&rDoc->CurURL, PingURL);
		    DpsLog(Indexer, DPS_LOG_INFO, "AUTH.PING: %s", PingURL);
		  
		    rDoc->method = method;
		    DpsVarListFree(&rDoc->RequestHeaders);
		    DpsVarListReplaceStr(&rDoc->Sections, "have_no_cookies", "0");
		    DpsDocAddDocExtraHeaders(Indexer, Server, rDoc);
		    DpsDocAddConfExtraHeaders(Indexer->Conf, rDoc);
		    DpsVarListReplaceLst(&rDoc->Sections, &Server->Vars, NULL, "*");
		    DpsDocAddServExtraHeaders(Server, rDoc);
		    if (method == DPS_METHOD_POST) {
		      dps_snprintf(buf, sizeof(buf), "application/x-www-form-urlencoded; charset=%s", DpsVarListFindStr(&Indexer->Conf->Vars, "LocalCharset", "iso-8859-1"));
		      DpsVarListReplaceStr(&rDoc->RequestHeaders, "Content-Type", buf);
		      dps_snprintf(buf, sizeof(buf), "%d", dps_strlen(PingBody));
		      DpsVarListReplaceStr(&rDoc->RequestHeaders, "Content-Length", buf);
		    }
		  
		    DpsVarListLog(Indexer, &rDoc->RequestHeaders, DPS_LOG_DEBUG, "AUTHPING.Request");
#if 0
		    if (Doc == NULL || Indexer->Flags.cmd == DPS_IND_FILTER) {
		      DpsDocLookupConn(Indexer, rDoc);
		    } else {
		      DPS_FREE(rDoc->connp.connp);
		      rDoc->connp = Doc->connp;
		    }
#endif

		    result = DpsGetURL(Indexer, rDoc, NULL); /* Just get it as we need only Cookies from the headers */
		    DpsDocProcessResponseHeaders(Indexer, rDoc);
		    DpsVarListDel(&rDoc->Sections, "body");
		    DpsVarListLog(Indexer, &rDoc->Sections, DPS_LOG_DEBUG, "AUTHPING.Response");
		    if (Doc != NULL) bzero(&rDoc->connp, sizeof(rDoc->connp));
		    DpsDocFree(rDoc);
		  }
		}
		DpsFree(AuthPing);
	      }
	    }





    while(hostinfo != NULL) {
      url_id = DpsStrHash32(hostinfo);
      DpsSQLResInit(&Res);
      dps_snprintf(buf, sizeof(buf), "SELECT name,value,path,secure FROM cookies WHERE domain='%s'", hostinfo);
      if (Indexer->flags & DPS_FLAG_UNOCON) {
	DPS_GETLOCK(Indexer, DPS_LOCK_DB);
	db = Indexer->Conf->dbl.db[url_id % Indexer->Conf->dbl.nitems];
      } else {
	db = Indexer->dbl.db[url_id % Indexer->dbl.nitems];
      }
      if(DPS_OK == (rc = DpsSQLQuery(db, &Res, buf))) {
	rows = DpsSQLNumRows(&Res);
	for(i = 0; i < rows; i++) {
	  DpsCookiesAdd(Indexer, hostinfo, DpsSQLValue(&Res, i, 2), DpsSQLValue(&Res, i, 0), DpsSQLValue(&Res, i, 1), 
			*DpsSQLValue(&Res, i, 3), 0, 0, 0);
	  if (*DpsSQLValue(&Res, i, 3) == 'y' && strcasecmp(Doc->CurURL.schema, "https")) continue;
	  if (strncasecmp(DpsSQLValue(&Res, i, 2), Doc->CurURL.path, dps_strlen(DpsSQLValue(&Res, i, 2)))) continue;
	  if (cookie.data_size)
	    DpsDSTRAppend(&cookie, "; ", 2);
	  DpsDSTRAppendStr(&cookie, DpsSQLValue(&Res, i, 0));
	  DpsDSTRAppend(&cookie, "=", 1);
	  DpsDSTRAppendStr(&cookie, DpsSQLValue(&Res, i, 1));
	}
	if (rows == 0) {
	  DpsCookiesAdd(Indexer, hostinfo, "/", "", "", 'n', 0, 0, 0);
	}
      }
      DpsSQLFree(&Res);
      if (Indexer->flags & DPS_FLAG_UNOCON) {
	DPS_RELEASELOCK(Indexer, DPS_LOCK_DB);
      }	  
      hostinfo = strchr(hostinfo, '.');
      if (hostinfo != NULL) hostinfo++;
    }
  }
  if (cookie.data_size) {
    DpsVarListReplaceStr(&Doc->RequestHeaders, "Cookie", cookie.data);
  }
  DpsDSTRFree(&cookie);
#endif
  TRACE_OUT(Indexer);
  return;
}