Example #1
0
void DpsAppendTarget(DPS_AGENT *Indexer, const char *url, const char *lang, const int hops, int parent) {
  DPS_DOCUMENT *Doc, *Save;
  size_t i;

  TRACE_IN(Indexer, "AppendTarget");

  DPS_GETLOCK(Indexer, DPS_LOCK_THREAD);
  DPS_GETLOCK(Indexer, DPS_LOCK_CONF);
  if (Indexer->Conf->Targets.num_rows > 0) {
    for (i = Indexer->Conf->Targets.num_rows - 1; i > 0; i--) {
      Doc = &Indexer->Conf->Targets.Doc[i];
      if ((strcasecmp(DpsVarListFindStr(&Doc->Sections, "URL", ""), url) == 0) 
	  && (strcmp(DpsVarListFindStr(&Doc->RequestHeaders, "Accept-Language", ""), lang) == 0)) {
	DPS_RELEASELOCK(Indexer, DPS_LOCK_CONF);
	DPS_RELEASELOCK(Indexer, DPS_LOCK_THREAD);
	TRACE_OUT(Indexer);
	return;
      }
    }
  }
  if ((Indexer->Conf->Targets.Doc = 
       DpsRealloc(Save = Indexer->Conf->Targets.Doc, (Indexer->Conf->Targets.num_rows + 1) * sizeof(DPS_DOCUMENT))) == NULL) {
    Indexer->Conf->Targets.Doc = Save;
    DPS_RELEASELOCK(Indexer, DPS_LOCK_CONF);
    DPS_RELEASELOCK(Indexer, DPS_LOCK_THREAD);
    TRACE_OUT(Indexer);
    return;
  }
  Doc = &Indexer->Conf->Targets.Doc[Indexer->Conf->Targets.num_rows];
  DpsDocInit(Doc);
  DpsVarListAddStr(&Doc->Sections, "URL", url);
  DpsVarListAddInt(&Doc->Sections, "Hops", hops);
  DpsVarListDel(&Doc->Sections, "URL_ID");
  DpsVarListReplaceInt(&Doc->Sections, "Referrer-ID", parent);
  if (*lang != '\0') DpsVarListAddStr(&Doc->RequestHeaders, "Accept-Language", lang);
  if (DPS_OK == DpsURLAction(Indexer, Doc, DPS_URL_ACTION_FINDBYURL)) {
    urlid_t url_id = DpsVarListFindInt(&Doc->Sections, "DP_ID", 0);
    if (url_id != 0) Indexer->Conf->Targets.num_rows++;
    else DpsDocFree(Doc);
  }
/*  fprintf(stderr, "-- AppandTarget: url:%s  URL_ID:%d\n", url, DpsStrHash32(url));*/
  DPS_RELEASELOCK(Indexer, DPS_LOCK_CONF);
  DpsURLAction(Indexer, Doc, DPS_URL_ACTION_ADD);
  DPS_RELEASELOCK(Indexer, DPS_LOCK_THREAD);
  TRACE_OUT(Indexer);
  return;
}
Example #2
0
int __DPSCALL DpsResAddDocInfoSearchd(DPS_AGENT * query,DPS_DB *cl,DPS_RESULT * Res,size_t clnum){
	DPS_SEARCHD_PACKET_HEADER hdr;
	char * msg=NULL;
	size_t i; /* num=0,curnum=0;*/
	int done = 0;
	ssize_t nsent,nrecv;
	char * dinfo=NULL;
	int	rc=DPS_OK;
	char		*textbuf;
	size_t dlen = 0;
	
	TRACE_IN(query, "DpsResAddDocInfoSearchd");

	if(!Res->num_rows) { TRACE_OUT(query); return(DPS_OK); }
	
	for(i=0;i<Res->num_rows;i++){
	  size_t		ulen;
	  size_t		olen;
	  size_t		nsec, r;
	  DPS_DOCUMENT	*D=&Res->Doc[i];

	  r = (size_t) 's';
	  for(nsec = 0; nsec < D->Sections.Root[r].nvars; nsec++)
	    if (strcasecmp(D->Sections.Root[r].Var[nsec].name, "Score") == 0) D->Sections.Root[r].Var[nsec].section = 1;

#ifdef WITH_MULTIDBADDR
	  if (D->dbnum != cl->dbnum) continue;
#endif		  

	  textbuf = DpsDocToTextBuf(D, 1, 0);
	  if (textbuf == NULL) {TRACE_OUT(query); return DPS_ERROR;}
					
	  ulen = dps_strlen(textbuf)+2;
	  olen = dlen;
	  dlen = dlen + ulen;
	  dinfo = (char*)DpsRealloc(dinfo, dlen + 1);
	  if (dinfo == NULL) {
	    DpsFree(textbuf);
	    TRACE_OUT(query);
	    return DPS_ERROR;
	  }
	  dinfo[olen] = '\0';
	  sprintf(dinfo + olen, "%s\r\n", textbuf);
	  DpsFree(textbuf);
	}

	if (dinfo == NULL) {
	    TRACE_OUT(query);
	    return DPS_OK;
	}

	hdr.cmd=DPS_SEARCHD_CMD_DOCINFO;
	hdr.len = dps_strlen(dinfo);
	
	nsent = DpsSearchdSendPacket(cl->searchd, &hdr, dinfo);
#ifdef DEBUG_SDP
	DpsLog(query, DPS_LOG_ERROR, "Sent DOCINFO size=%d buf=%s\n", hdr.len, dinfo);
#endif				
	
	while(!done){
		char * tok, * lt;
		nrecv = DpsRecvall(cl->searchd, &hdr, sizeof(hdr), 360);
		
		if(nrecv!=sizeof(hdr)){
		  DpsLog(query, DPS_LOG_ERROR, "Received incomplete header from searchd (%d bytes, errno:%d)", (int)nrecv, errno);
			TRACE_OUT(query);
			return(DPS_ERROR);
		}else{
#ifdef DEBUG_SDP
		  DpsLog(query, DPS_LOG_ERROR, "Received header cmd=%d len=%d\n",hdr.cmd,hdr.len);
#endif
		}
		switch(hdr.cmd){
			case DPS_SEARCHD_CMD_ERROR:
				msg=(char*)DpsMalloc(hdr.len+1); 
				if (msg == NULL) {
				  done = 1;
				  break;
				}
				nrecv = DpsRecvall(cl->searchd, msg, hdr.len, 360);
				msg[(nrecv >= 0) ? nrecv : 0]='\0';
				sprintf(query->Conf->errstr,"Searchd error: '%s'",msg);
				rc=DPS_ERROR;
				DPS_FREE(msg);
				done=1;
				break;
			case DPS_SEARCHD_CMD_MESSAGE:
				msg=(char*)DpsMalloc(hdr.len+1);
				if (msg == NULL) {
				  done = 1;
				  break;
				}
				nrecv = DpsRecvall(cl->searchd, msg, hdr.len, 360);
				msg[(nrecv >= 0) ? nrecv : 0]='\0';
#ifdef DEBUG_SDP
				DpsLog(query, DPS_LOG_ERROR, "Message from searchd: '%s'\n",msg);
#endif
				DPS_FREE(msg);
				break;
			case DPS_SEARCHD_CMD_DOCINFO:
				dinfo = (char*)DpsRealloc(dinfo, hdr.len + 1);
				if (dinfo == NULL) {
				  done=1;
				  break;
				}
				nrecv = DpsRecvall(cl->searchd, dinfo, hdr.len, 360);
				dinfo[(nrecv > 0) ? nrecv : 0] = '\0';
#ifdef DEBUG_SDP
				DpsLog(query, DPS_LOG_ERROR, "Received DOCINFO size=%d buf=%s\n",hdr.len,dinfo);
#endif				
				tok = dps_strtok_r(dinfo, "\r\n", &lt, NULL);
				
				while(tok){
				  urlid_t Doc_url_id, Res_Doc_url_id;
					DPS_DOCUMENT Doc;
					
					DpsDocInit(&Doc);
					DpsDocFromTextBuf(&Doc,tok);
					Doc_url_id = (urlid_t)DpsVarListFindInt(&Doc.Sections, "DP_ID", 0);

					for(i=0;i<Res->num_rows;i++){				
#ifdef WITH_MULTIDBADDR
						if (Res->Doc[i].dbnum != cl->dbnum) continue;
#endif
						Res_Doc_url_id = (urlid_t)DpsVarListFindInt(&Res->Doc[i].Sections, "DP_ID", 0);
						if (Res_Doc_url_id == Doc_url_id) {
						  DpsDocFromTextBuf(&Res->Doc[i], tok);
						  break;
						}
					}
					tok = dps_strtok_r(NULL, "\r\n", &lt, NULL);
					DpsDocFree(&Doc);
				}
				DPS_FREE(dinfo);
				done=1;
				break;
			default:
				sprintf(query->Conf->errstr,"Unknown searchd response: cmd=%d len=%d",hdr.cmd,hdr.len);
				rc=DPS_ERROR;
				done=1;
				break;
		}
	}
	TRACE_OUT(query);
	return rc;
}
Example #3
0
void DpsCookiesFind(DPS_AGENT *Indexer, DPS_SERVER *Server, DPS_DOCUMENT *Doc, const char *hostinfo) {
#ifdef HAVE_SQL
  DPS_DSTR cookie;
  DPS_COOKIES *Cookies = &Indexer->Cookies;
  DPS_COOKIE *Coo;
  size_t i, blen = dps_strlen(hostinfo), slen;
  int have_no_cookies = DpsVarListFindInt(&Doc->Sections, "have_no_cookies", 1);
#ifdef WITH_PARANOIA
  void *paran = DpsViolationEnter(paran);
#endif
  TRACE_IN(Indexer, "DpsCookiesFind");

  DpsDSTRInit(&cookie, 1024);
  for(i = 0; i < Cookies->ncookies; i++) {
    Coo = &Cookies->Cookie[i];
    slen = dps_strlen(Coo->domain);
    if (slen > blen) continue;
    if (Coo->secure == 'y' && strcasecmp(Doc->CurURL.schema, "https")) continue;
    if (strncasecmp(Coo->path, Doc->CurURL.path, dps_strlen(Coo->path))) continue;
    if (strcasecmp(Coo->domain, hostinfo + (blen - slen))) continue;
    if (Coo->from_config != 1) have_no_cookies = 0;
    if (Coo->name[0] == '\0' && Coo->value[0] == '\0') continue;
    if (cookie.data_size)
      DpsDSTRAppend(&cookie, "; ", 2);
    DpsDSTRAppendStr(&cookie, Coo->name);
    DpsDSTRAppend(&cookie, "=", 1);
    DpsDSTRAppendStr(&cookie, Coo->value);
  }
  if (have_no_cookies) {
    char buf[2*PATH_MAX];
    dpshash32_t url_id;
    DPS_DB *db;
    DPS_SQLRES Res;
    size_t rows;
    int rc;


	    if (Server != NULL) {
	      char *PingData = DpsVarListFindStr(&Server->Vars, "AuthPing", NULL);
	      if (PingData != NULL) {
		char *AuthPing = DpsStrdup(DpsTrim(PingData, " \t\r\n"));
		int method = DPS_METHOD_GET;
		dps_base64_decode(AuthPing, PingData, dps_strlen(PingData));
		if (!strncasecmp(AuthPing, "GET", 3)) {
		  method = DPS_METHOD_GET;
		  PingData = DpsTrim(AuthPing + 3, " \t\r\n");
		} else if (!strncasecmp(AuthPing, "POST", 4)) {
		  method = DPS_METHOD_POST;
		  PingData = DpsTrim(AuthPing + 4, " \t\r\n");
		} else {
		  DpsLog(Indexer, DPS_LOG_ERROR, "AuthPing should be GET or POST: %s", AuthPing);
		  PingData = NULL;
		}
		if (PingData != NULL) {
		  size_t size = dps_strlen(PingData);
		  {
		    char PingURL[size + 2];
		    char PingBody[size];
		    DPS_DOCUMENT *rDoc;
		    int result;

		    rDoc = DpsDocInit(NULL);
		    DpsSpiderParamInit(&rDoc->Spider);
		    DpsVarList2Doc(rDoc, Server);
		    rDoc->Buf.max_size = (size_t)DpsVarListFindInt(&Indexer->Vars, "MaxDocSize", DPS_MAXDOCSIZE);
		    rDoc->Buf.allocated_size = DPS_NET_BUF_SIZE;
		    if ((rDoc->Buf.buf = (char*)DpsMalloc(rDoc->Buf.allocated_size + 1)) == NULL) {
		      DpsDocFree(rDoc);
		      TRACE_OUT(Indexer);
		      return;
		    }
		    rDoc->Buf.buf[0]='\0';
		    rDoc->subdoc = Indexer->Flags.SubDocLevel + 1;

#if 1
		    dps_snprintf(buf, sizeof(buf), "%s://%s/", DPS_NULL2EMPTY(Doc->CurURL.schema), DPS_NULL2EMPTY(Doc->CurURL.hostinfo));
		    DpsVarListReplaceStr(&rDoc->Sections, "URL", buf);
		    DpsURLParse(&rDoc->CurURL, buf);
		    DpsLog(Indexer, DPS_LOG_INFO, "HOME: %s", buf);
		    rDoc->method = DPS_METHOD_HEAD;
		    /*		    DpsVarListFree(&rDoc->RequestHeaders);*/
		    if (Doc != NULL) {
		      DpsVarListReplaceLst(&rDoc->RequestHeaders, &Doc->RequestHeaders, NULL, "*"); 
		    }

		    DpsVarListReplaceStr(&rDoc->Sections, "have_no_cookies", "0");
		    DpsDocAddDocExtraHeaders(Indexer, Server, rDoc);
		    DpsDocAddConfExtraHeaders(Indexer->Conf, rDoc);
		    DpsVarListReplaceLst(&rDoc->Sections, &Server->Vars, NULL, "*");
		    DpsDocAddServExtraHeaders(Server, rDoc);
		    DpsVarListLog(Indexer, &rDoc->RequestHeaders, DPS_LOG_DEBUG, "HOME.Request");
		    if (Doc == NULL || Indexer->Flags.cmd == DPS_IND_FILTER) {
		      DpsDocLookupConn(Indexer, rDoc);
		    } else {
		      DPS_FREE(rDoc->connp.connp);
		      rDoc->connp = Doc->connp;
		    }
		    result = DpsGetURL(Indexer, rDoc, NULL); /* Just get headers from the home as we need only Cookies from it */
		    DpsDocProcessResponseHeaders(Indexer, rDoc);
		    DpsVarListLog(Indexer, &rDoc->Sections, DPS_LOG_DEBUG, "HOME.Response");
#endif

		    sscanf(PingData, "%s %s", PingURL, PingBody);
		    if (rDoc->method == DPS_METHOD_GET) {
		      dps_strcat(PingURL, "?");
		      dps_strcat(PingURL, PingBody);
		    } else {
		      DpsVarListReplaceStr(&rDoc->Sections, "body", PingBody);
		    }
		    DpsVarListReplaceStr(&rDoc->Sections, "URL", PingURL);
		    DpsURLParse(&rDoc->CurURL, PingURL);
		    DpsLog(Indexer, DPS_LOG_INFO, "AUTH.PING: %s", PingURL);
		  
		    rDoc->method = method;
		    DpsVarListFree(&rDoc->RequestHeaders);
		    DpsVarListReplaceStr(&rDoc->Sections, "have_no_cookies", "0");
		    DpsDocAddDocExtraHeaders(Indexer, Server, rDoc);
		    DpsDocAddConfExtraHeaders(Indexer->Conf, rDoc);
		    DpsVarListReplaceLst(&rDoc->Sections, &Server->Vars, NULL, "*");
		    DpsDocAddServExtraHeaders(Server, rDoc);
		    if (method == DPS_METHOD_POST) {
		      dps_snprintf(buf, sizeof(buf), "application/x-www-form-urlencoded; charset=%s", DpsVarListFindStr(&Indexer->Conf->Vars, "LocalCharset", "iso-8859-1"));
		      DpsVarListReplaceStr(&rDoc->RequestHeaders, "Content-Type", buf);
		      dps_snprintf(buf, sizeof(buf), "%d", dps_strlen(PingBody));
		      DpsVarListReplaceStr(&rDoc->RequestHeaders, "Content-Length", buf);
		    }
		  
		    DpsVarListLog(Indexer, &rDoc->RequestHeaders, DPS_LOG_DEBUG, "AUTHPING.Request");
#if 0
		    if (Doc == NULL || Indexer->Flags.cmd == DPS_IND_FILTER) {
		      DpsDocLookupConn(Indexer, rDoc);
		    } else {
		      DPS_FREE(rDoc->connp.connp);
		      rDoc->connp = Doc->connp;
		    }
#endif

		    result = DpsGetURL(Indexer, rDoc, NULL); /* Just get it as we need only Cookies from the headers */
		    DpsDocProcessResponseHeaders(Indexer, rDoc);
		    DpsVarListDel(&rDoc->Sections, "body");
		    DpsVarListLog(Indexer, &rDoc->Sections, DPS_LOG_DEBUG, "AUTHPING.Response");
		    if (Doc != NULL) bzero(&rDoc->connp, sizeof(rDoc->connp));
		    DpsDocFree(rDoc);
		  }
		}
		DpsFree(AuthPing);
	      }
	    }





    while(hostinfo != NULL) {
      url_id = DpsStrHash32(hostinfo);
      DpsSQLResInit(&Res);
      dps_snprintf(buf, sizeof(buf), "SELECT name,value,path,secure FROM cookies WHERE domain='%s'", hostinfo);
      if (Indexer->flags & DPS_FLAG_UNOCON) {
	DPS_GETLOCK(Indexer, DPS_LOCK_DB);
	db = Indexer->Conf->dbl.db[url_id % Indexer->Conf->dbl.nitems];
      } else {
	db = Indexer->dbl.db[url_id % Indexer->dbl.nitems];
      }
      if(DPS_OK == (rc = DpsSQLQuery(db, &Res, buf))) {
	rows = DpsSQLNumRows(&Res);
	for(i = 0; i < rows; i++) {
	  DpsCookiesAdd(Indexer, hostinfo, DpsSQLValue(&Res, i, 2), DpsSQLValue(&Res, i, 0), DpsSQLValue(&Res, i, 1), 
			*DpsSQLValue(&Res, i, 3), 0, 0, 0);
	  if (*DpsSQLValue(&Res, i, 3) == 'y' && strcasecmp(Doc->CurURL.schema, "https")) continue;
	  if (strncasecmp(DpsSQLValue(&Res, i, 2), Doc->CurURL.path, dps_strlen(DpsSQLValue(&Res, i, 2)))) continue;
	  if (cookie.data_size)
	    DpsDSTRAppend(&cookie, "; ", 2);
	  DpsDSTRAppendStr(&cookie, DpsSQLValue(&Res, i, 0));
	  DpsDSTRAppend(&cookie, "=", 1);
	  DpsDSTRAppendStr(&cookie, DpsSQLValue(&Res, i, 1));
	}
	if (rows == 0) {
	  DpsCookiesAdd(Indexer, hostinfo, "/", "", "", 'n', 0, 0, 0);
	}
      }
      DpsSQLFree(&Res);
      if (Indexer->flags & DPS_FLAG_UNOCON) {
	DPS_RELEASELOCK(Indexer, DPS_LOCK_DB);
      }	  
      hostinfo = strchr(hostinfo, '.');
      if (hostinfo != NULL) hostinfo++;
    }
  }
  if (cookie.data_size) {
    DpsVarListReplaceStr(&Doc->RequestHeaders, "Cookie", cookie.data);
  }
  DpsDSTRFree(&cookie);
#endif
  TRACE_OUT(Indexer);
  return;
}