__C_LINK void __DPSCALL DpsTextListFree(DPS_TEXTLIST *tlist){ size_t i; #ifdef WITH_PARANOIA void * paran = DpsViolationEnter(paran); #endif #ifdef DEBUG_MEM if (tlist->mitems) { mprotect(tlist->Items, tlist->mitems * sizeof(DPS_TEXTITEM), PROT_READ | PROT_WRITE); } #endif for(i = 0; i < tlist->nitems; i++) { DPS_FREE(tlist->Items[i].str); DPS_FREE(tlist->Items[i].href); DPS_FREE(tlist->Items[i].section_name); } DPS_FREE(tlist->Items); tlist->nitems = 0; tlist->mitems = 0; #ifdef WITH_PARANOIA DpsViolationExit(-1, paran); #endif return; }
__C_LINK void __DPSCALL DpsTextListAdd(DPS_TEXTLIST * tlist,const DPS_TEXTITEM *item){ #ifdef WITH_PARANOIA void * paran = DpsViolationEnter(paran); #endif if(!item->str) { #ifdef WITH_PARANOIA DpsViolationExit(-1, paran); #endif return; } #ifdef DEBUG_MEM if (tlist->mitems) { mprotect(tlist->Items, tlist->nitems * sizeof(DPS_TEXTITEM), PROT_READ | PROT_WRITE); /* fprintf(stderr, "addr: %x, len: %d till %x -- PROT_BOTH\n", tlist->Items, tlist->nitems * sizeof(DPS_TEXTITEM), tlist->Items + tlist->nitems );*/ } #endif if (tlist->nitems + 1 > tlist->mitems) { tlist->mitems += DPS_TEXTLIST_PAS; tlist->Items = (DPS_TEXTITEM*)DpsRealloc(tlist->Items, (tlist->mitems) * sizeof(DPS_TEXTITEM) + 4096); if (tlist->Items == NULL) { tlist->nitems = tlist->mitems = 0; return; } } tlist->Items[tlist->nitems].str = (char*)DpsStrdup(item->str); tlist->Items[tlist->nitems].href = (item->href != NULL) ? (char*)DpsStrdup(item->href) : NULL; tlist->Items[tlist->nitems].section_name = (item->section_name != NULL) ? (char*)DpsStrdup(item->section_name) : NULL; tlist->Items[tlist->nitems].section = item->section; tlist->Items[tlist->nitems].strict = item->strict; tlist->Items[tlist->nitems].len = item->len; tlist->Items[tlist->nitems].marked = 0; tlist->nitems++; #ifdef DEBUG_MEM if (tlist->mitems) { mprotect(tlist->Items, tlist->nitems * sizeof(DPS_TEXTITEM), PROT_READ); /* fprintf(stderr, "addr: %x, len: %d till %x -- PROT_READ\n", tlist->Items, tlist->nitems * sizeof(DPS_TEXTITEM), tlist->Items + tlist->nitems );*/ } #endif #ifdef WITH_PARANOIA DpsViolationExit(-1, paran); #endif return; }
int _DpsURLParse(DPS_URL *url, const char *str, const char *filename, int line) { #else int DpsURLParse(DPS_URL *url, const char *str) { #endif char *schema,*anchor,*file,*query; char *s; /* size_t len = dps_strlen(str);*/ #ifdef WITH_PARANOIA void * paran = DpsViolationEnter(paran); #endif #ifdef DEBUG_URL fprintf(stderr, " -- %s:%d Parser url: %s\n", filename, line, str); #endif DPS_FREE(url->schema); DPS_FREE(url->specific); DPS_FREE(url->hostinfo); DPS_FREE(url->hostname); DPS_FREE(url->anchor); DPS_FREE(url->auth); url->port=0; url->default_port=0; DPS_FREE(url->path); DPS_FREE(url->directory); DPS_FREE(url->filename); DPS_FREE(url->query_string); /* if(len >= DPS_URLSIZE)return(DPS_URL_LONG); FIXME: Chage this cheking for configured parameter, not DPS_URLSIZE */ s = (char*)DpsStrdup(str); if (s == NULL) { #ifdef WITH_PARANOIA DpsViolationExit(-1, paran); #endif return DPS_ERROR; } url->len = dps_strlen(str); /* Find possible schema end than */ /* Check that it is really schema */ /* It must consist of alphas only */ /* We will take in account digits */ /* also for oracle8:// for example */ /* We must check it because */ /* It might be anchor also */ /* For example: */ /* "mod/index.html#a:1" */ if((schema=strchr(s,':'))){ const char * ch; for(ch=s;ch<schema;ch++){ if(!isalnum(*ch)){ /* Bad character */ /* so it is not schema */ schema=0;break; } } } if(schema){ /* Have scheme - absolute path */ *schema=0; url->schema = (char*)DpsStrdup(s); url->specific = (char*)DpsStrdup(schema + 1); *schema=':'; if(!strcasecmp(url->schema,"http"))url->default_port=80; else if(!strcasecmp(url->schema,"https"))url->default_port=443; else if(!strcasecmp(url->schema,"nntp"))url->default_port=119; else if(!strcasecmp(url->schema,"news"))url->default_port=119; else if(!strcasecmp(url->schema,"ftp"))url->default_port=21; if(!strncmp(url->specific,"//",2)){ char *ss,*hostname; /* Have hostinfo */ if((ss=strchr(url->specific+2,'/'))){ /* Have hostname with path */ *ss=0; url->hostinfo = (char*)DpsStrdup(url->specific + 2); *ss='/'; url->path = (char*)DpsStrdup(ss); }else{ /* Hostname without path */ if ((ss = strchr(url->specific + 2, '?'))) { /* Have hostname with parameters */ *ss = 0; url->hostinfo = (char*)DpsStrdup(url->specific + 2); *ss='?'; url->path = (char*)DpsStrdup("/"); }else { url->hostinfo = (char*)DpsStrdup(url->specific + 2); url->path = (char*)DpsStrdup("/"); } } if((hostname=strchr(url->hostinfo,'@'))){ /* Username and password is given */ /* Store auth string user:password */ *hostname=0; url->auth = (char*)DpsStrdup(url->hostinfo); *hostname='@'; hostname++; }else{ hostname = url->hostinfo; } /* FIXME: for(h=hostname;*h;h++){ if( *h>='A' && *h<='Z') *h=(*h)-'A'+'a'; } */ if((ss=strchr(hostname,':'))){ *ss=0; url->hostname = (char*)DpsStrdup(hostname); *ss=':'; url->port=atoi(ss+1); }else{ url->hostname = (char*)DpsStrdup(hostname); url->port=0; } }else{ /* Have not host but have schema */ /* This is possible for: */ /* file: mailto: htdb: news: */ /* As far as we do not need mailto: just ignore it */ if(!strcasecmp(url->schema,"mailto") || !strcasecmp(url->schema,"javascript") || !strcasecmp(url->schema,"feed") ) { DPS_FREE(s); #ifdef WITH_PARANOIA DpsViolationExit(-1, paran); #endif return(DPS_URL_BAD); } else if(!strcasecmp(url->schema,"file")) url->path = (char*)DpsStrdup(url->specific); else if(!strcasecmp(url->schema,"exec")) url->path = (char*)DpsStrdup(url->specific); else if(!strcasecmp(url->schema,"cgi")) url->path = (char*)DpsStrdup(url->specific); else if(!strcasecmp(url->schema,"htdb")) url->path = (char*)DpsStrdup(url->specific); else if(!strcasecmp(url->schema,"news")){ /* Now we will use localhost as NNTP */ /* server as it is not indicated in URL */ url->hostname = (char*)DpsStrdup("localhost"); url->path = (char*)DpsMalloc(dps_strlen(url->specific) + 2); if (url->path == NULL) { DPS_FREE(s); #ifdef WITH_PARANOIA DpsViolationExit(-1, paran); #endif return DPS_ERROR; } sprintf(url->path,"/%s",url->specific); url->default_port=119; }else{ /* Unknown strange schema */ DPS_FREE(s); #ifdef WITH_PARANOIA DpsViolationExit(-1, paran); #endif return(DPS_URL_BAD); } } }else{ url->path = (char*)DpsStrdup(s); } /* Cat an anchor if exist */ if((anchor=strstr(url->path,"#")))*anchor=0; /* If path is not full just copy it to filename */ /* i.e. neither /usr/local/ nor c:/windows/temp/ */ if((url->path != NULL) && (url->path[0]!='/') && (url->path[0]!='?') && (url->path[1]!=':')) { /* Relative path */ if(!strncmp(url->path,"./",2)) url->filename = (char*)DpsStrdup(url->path + 2); else url->filename = (char*)DpsStrdup(url->path); url->path[0] = 0; } /* truncate path to query_string */ /* and store query_string */ if((query=strrchr(url->path,'?'))){ url->query_string = (char*)DpsStrdup(query); *(query) = 0; } DpsURLNormalizePath(url->path); /* Now find right '/' sign and copy the rest to filename */ if((file=strrchr(url->path,'/'))&&(strcmp(file,"/"))){ url->filename = (char*)DpsStrdup(file + 1); *(file+1)=0; } /* Now find right '/' sign and copy the rest to directory */ if ((file = strrchr(url->path,'/'))) { char *p_save = file; for(file--; (file > url->path) && (*file != '/'); file--); file++; if (*file) { *p_save = '\0'; url->directory = (char*)DpsStrdup(file); *p_save = '/'; } } DPS_FREE(s); if (url->hostname != NULL) { DpsRTrim(url->hostname, "."); url->domain_level = 1; for (s = url->hostname; *s; s++) { *s = dps_tolower(*s); if (*s == '.') url->domain_level++; if (strchr(",'\";", (int)*s)) { #ifdef WITH_PARANOIA DpsViolationExit(-1, paran); #endif return DPS_URL_BAD; } } } if (url->hostinfo != NULL) { DpsRTrim(url->hostinfo, "."); s = strchr(url->hostinfo, '@'); for (s = (s == NULL) ? url->hostinfo : s + 1; *s; s++) *s = dps_tolower(*s); } if (url->schema != NULL) for (s = url->schema; *s; s++) *s = dps_tolower(*s); /* fprintf(stderr, "url: .path: %s port:%d\n", url->path, url->port);*/ #ifdef WITH_PARANOIA DpsViolationExit(-1, paran); #endif return DPS_OK; }
int DpsCookiesAdd(DPS_AGENT *Indexer, const char *domain, const char * path, const char *name, const char *value, const char secure, dps_uint4 expires, const char from_config, int insert_flag) { #ifdef HAVE_SQL char buf[3*PATH_MAX]; char path_esc[2*PATH_MAX+1]; DPS_COOKIES *Cookies = &Indexer->Cookies; DPS_COOKIE *Coo; DPS_DB *db; dpshash32_t url_id = DpsStrHash32(domain); size_t i; #ifdef WITH_PARANOIA void *paran = DpsViolationEnter(paran); #endif if (Indexer->flags & DPS_FLAG_UNOCON) { if (Indexer->Conf->dbl.nitems == 0) return DPS_OK; DPS_GETLOCK(Indexer, DPS_LOCK_DB); db = Indexer->Conf->dbl.db[url_id % Indexer->Conf->dbl.nitems]; } else { if (Indexer->dbl.nitems == 0) return DPS_OK; db = Indexer->dbl.db[url_id % Indexer->dbl.nitems]; } (void)DpsDBEscStr(db, path_esc, DPS_NULL2EMPTY(path), dps_min(PATH_MAX,dps_strlen(DPS_NULL2EMPTY(path)))); for (i = 0; i < Cookies->ncookies; i++) { Coo = &Cookies->Cookie[i]; if (!strcasecmp(Coo->domain, domain) && !strcasecmp(Coo->path, DPS_NULL2EMPTY(path)) && !strcasecmp(Coo->name, name) && (Coo->secure == secure)/* && (Coo->from_config == from_config)*/ ) { DPS_FREE(Coo->value); Coo->value = DpsStrdup(value); /* Coo->expires = expires;*/ if (insert_flag) { dps_snprintf(buf, sizeof(buf), "UPDATE cookies SET value='%s',expires=%d WHERE domain='%s' AND path='%s' AND name='%s' AND secure='%c'", value, expires, domain, path_esc, name, secure); DpsSQLAsyncQuery(db, NULL, buf); } if (Indexer->flags & DPS_FLAG_UNOCON) DPS_RELEASELOCK(Indexer, DPS_LOCK_DB); #ifdef WITH_PARANOIA DpsViolationExit(Indexer->handle, paran); #endif return DPS_OK; } } Cookies->Cookie = (DPS_COOKIE*)DpsRealloc(Cookies->Cookie, (Cookies->ncookies + 1) * sizeof(DPS_COOKIE)); if(Cookies->Cookie == NULL) { Cookies->ncookies = 0; if (Indexer->flags & DPS_FLAG_UNOCON) DPS_RELEASELOCK(Indexer, DPS_LOCK_DB); #ifdef WITH_PARANOIA DpsViolationExit(Indexer->handle, paran); #endif return DPS_ERROR; } Coo = &Cookies->Cookie[Cookies->ncookies]; /* Coo->expires = expires;*/ Coo->secure = secure; Coo->from_config = from_config; Coo->domain = DpsStrdup(domain); Coo->path = DpsStrdup(path); Coo->name = DpsStrdup(name); Coo->value = DpsStrdup(value); if (insert_flag) { if (Indexer->Flags.CheckInsertSQL) { dps_snprintf(buf, sizeof(buf), "DELETE FROM cookies WHERE domain='%s' AND path='%s' AND name='%s' AND secure='%c'", domain, path_esc, name, secure); DpsSQLAsyncQuery(db, NULL, buf); } dps_snprintf(buf, sizeof(buf), "INSERT INTO cookies(expires,secure,domain,path,name,value)VALUES(%d,'%c','%s','%s','%s','%s')", expires, secure, domain, path_esc, name, value); DpsSQLAsyncQuery(db, NULL, buf); } Cookies->ncookies++; if (Indexer->flags & DPS_FLAG_UNOCON) DPS_RELEASELOCK(Indexer, DPS_LOCK_DB); #ifdef WITH_PARANOIA DpsViolationExit(Indexer->handle, paran); #endif #endif /*HAVE_SQL*/ return DPS_OK; }
void DpsCookiesFind(DPS_AGENT *Indexer, DPS_SERVER *Server, DPS_DOCUMENT *Doc, const char *hostinfo) { #ifdef HAVE_SQL DPS_DSTR cookie; DPS_COOKIES *Cookies = &Indexer->Cookies; DPS_COOKIE *Coo; size_t i, blen = dps_strlen(hostinfo), slen; int have_no_cookies = DpsVarListFindInt(&Doc->Sections, "have_no_cookies", 1); #ifdef WITH_PARANOIA void *paran = DpsViolationEnter(paran); #endif TRACE_IN(Indexer, "DpsCookiesFind"); DpsDSTRInit(&cookie, 1024); for(i = 0; i < Cookies->ncookies; i++) { Coo = &Cookies->Cookie[i]; slen = dps_strlen(Coo->domain); if (slen > blen) continue; if (Coo->secure == 'y' && strcasecmp(Doc->CurURL.schema, "https")) continue; if (strncasecmp(Coo->path, Doc->CurURL.path, dps_strlen(Coo->path))) continue; if (strcasecmp(Coo->domain, hostinfo + (blen - slen))) continue; if (Coo->from_config != 1) have_no_cookies = 0; if (Coo->name[0] == '\0' && Coo->value[0] == '\0') continue; if (cookie.data_size) DpsDSTRAppend(&cookie, "; ", 2); DpsDSTRAppendStr(&cookie, Coo->name); DpsDSTRAppend(&cookie, "=", 1); DpsDSTRAppendStr(&cookie, Coo->value); } if (have_no_cookies) { char buf[2*PATH_MAX]; dpshash32_t url_id; DPS_DB *db; DPS_SQLRES Res; size_t rows; int rc; if (Server != NULL) { char *PingData = DpsVarListFindStr(&Server->Vars, "AuthPing", NULL); if (PingData != NULL) { char *AuthPing = DpsStrdup(DpsTrim(PingData, " \t\r\n")); int method = DPS_METHOD_GET; dps_base64_decode(AuthPing, PingData, dps_strlen(PingData)); if (!strncasecmp(AuthPing, "GET", 3)) { method = DPS_METHOD_GET; PingData = DpsTrim(AuthPing + 3, " \t\r\n"); } else if (!strncasecmp(AuthPing, "POST", 4)) { method = DPS_METHOD_POST; PingData = DpsTrim(AuthPing + 4, " \t\r\n"); } else { DpsLog(Indexer, DPS_LOG_ERROR, "AuthPing should be GET or POST: %s", AuthPing); PingData = NULL; } if (PingData != NULL) { size_t size = dps_strlen(PingData); { char PingURL[size + 2]; char PingBody[size]; DPS_DOCUMENT *rDoc; int result; rDoc = DpsDocInit(NULL); DpsSpiderParamInit(&rDoc->Spider); DpsVarList2Doc(rDoc, Server); rDoc->Buf.max_size = (size_t)DpsVarListFindInt(&Indexer->Vars, "MaxDocSize", DPS_MAXDOCSIZE); rDoc->Buf.allocated_size = DPS_NET_BUF_SIZE; if ((rDoc->Buf.buf = (char*)DpsMalloc(rDoc->Buf.allocated_size + 1)) == NULL) { DpsDocFree(rDoc); TRACE_OUT(Indexer); return; } rDoc->Buf.buf[0]='\0'; rDoc->subdoc = Indexer->Flags.SubDocLevel + 1; #if 1 dps_snprintf(buf, sizeof(buf), "%s://%s/", DPS_NULL2EMPTY(Doc->CurURL.schema), DPS_NULL2EMPTY(Doc->CurURL.hostinfo)); DpsVarListReplaceStr(&rDoc->Sections, "URL", buf); DpsURLParse(&rDoc->CurURL, buf); DpsLog(Indexer, DPS_LOG_INFO, "HOME: %s", buf); rDoc->method = DPS_METHOD_HEAD; /* DpsVarListFree(&rDoc->RequestHeaders);*/ if (Doc != NULL) { DpsVarListReplaceLst(&rDoc->RequestHeaders, &Doc->RequestHeaders, NULL, "*"); } DpsVarListReplaceStr(&rDoc->Sections, "have_no_cookies", "0"); DpsDocAddDocExtraHeaders(Indexer, Server, rDoc); DpsDocAddConfExtraHeaders(Indexer->Conf, rDoc); DpsVarListReplaceLst(&rDoc->Sections, &Server->Vars, NULL, "*"); DpsDocAddServExtraHeaders(Server, rDoc); DpsVarListLog(Indexer, &rDoc->RequestHeaders, DPS_LOG_DEBUG, "HOME.Request"); if (Doc == NULL || Indexer->Flags.cmd == DPS_IND_FILTER) { DpsDocLookupConn(Indexer, rDoc); } else { DPS_FREE(rDoc->connp.connp); rDoc->connp = Doc->connp; } result = DpsGetURL(Indexer, rDoc, NULL); /* Just get headers from the home as we need only Cookies from it */ DpsDocProcessResponseHeaders(Indexer, rDoc); DpsVarListLog(Indexer, &rDoc->Sections, DPS_LOG_DEBUG, "HOME.Response"); #endif sscanf(PingData, "%s %s", PingURL, PingBody); if (rDoc->method == DPS_METHOD_GET) { dps_strcat(PingURL, "?"); dps_strcat(PingURL, PingBody); } else { DpsVarListReplaceStr(&rDoc->Sections, "body", PingBody); } DpsVarListReplaceStr(&rDoc->Sections, "URL", PingURL); DpsURLParse(&rDoc->CurURL, PingURL); DpsLog(Indexer, DPS_LOG_INFO, "AUTH.PING: %s", PingURL); rDoc->method = method; DpsVarListFree(&rDoc->RequestHeaders); DpsVarListReplaceStr(&rDoc->Sections, "have_no_cookies", "0"); DpsDocAddDocExtraHeaders(Indexer, Server, rDoc); DpsDocAddConfExtraHeaders(Indexer->Conf, rDoc); DpsVarListReplaceLst(&rDoc->Sections, &Server->Vars, NULL, "*"); DpsDocAddServExtraHeaders(Server, rDoc); if (method == DPS_METHOD_POST) { dps_snprintf(buf, sizeof(buf), "application/x-www-form-urlencoded; charset=%s", DpsVarListFindStr(&Indexer->Conf->Vars, "LocalCharset", "iso-8859-1")); DpsVarListReplaceStr(&rDoc->RequestHeaders, "Content-Type", buf); dps_snprintf(buf, sizeof(buf), "%d", dps_strlen(PingBody)); DpsVarListReplaceStr(&rDoc->RequestHeaders, "Content-Length", buf); } DpsVarListLog(Indexer, &rDoc->RequestHeaders, DPS_LOG_DEBUG, "AUTHPING.Request"); #if 0 if (Doc == NULL || Indexer->Flags.cmd == DPS_IND_FILTER) { DpsDocLookupConn(Indexer, rDoc); } else { DPS_FREE(rDoc->connp.connp); rDoc->connp = Doc->connp; } #endif result = DpsGetURL(Indexer, rDoc, NULL); /* Just get it as we need only Cookies from the headers */ DpsDocProcessResponseHeaders(Indexer, rDoc); DpsVarListDel(&rDoc->Sections, "body"); DpsVarListLog(Indexer, &rDoc->Sections, DPS_LOG_DEBUG, "AUTHPING.Response"); if (Doc != NULL) bzero(&rDoc->connp, sizeof(rDoc->connp)); DpsDocFree(rDoc); } } DpsFree(AuthPing); } } while(hostinfo != NULL) { url_id = DpsStrHash32(hostinfo); DpsSQLResInit(&Res); dps_snprintf(buf, sizeof(buf), "SELECT name,value,path,secure FROM cookies WHERE domain='%s'", hostinfo); if (Indexer->flags & DPS_FLAG_UNOCON) { DPS_GETLOCK(Indexer, DPS_LOCK_DB); db = Indexer->Conf->dbl.db[url_id % Indexer->Conf->dbl.nitems]; } else { db = Indexer->dbl.db[url_id % Indexer->dbl.nitems]; } if(DPS_OK == (rc = DpsSQLQuery(db, &Res, buf))) { rows = DpsSQLNumRows(&Res); for(i = 0; i < rows; i++) { DpsCookiesAdd(Indexer, hostinfo, DpsSQLValue(&Res, i, 2), DpsSQLValue(&Res, i, 0), DpsSQLValue(&Res, i, 1), *DpsSQLValue(&Res, i, 3), 0, 0, 0); if (*DpsSQLValue(&Res, i, 3) == 'y' && strcasecmp(Doc->CurURL.schema, "https")) continue; if (strncasecmp(DpsSQLValue(&Res, i, 2), Doc->CurURL.path, dps_strlen(DpsSQLValue(&Res, i, 2)))) continue; if (cookie.data_size) DpsDSTRAppend(&cookie, "; ", 2); DpsDSTRAppendStr(&cookie, DpsSQLValue(&Res, i, 0)); DpsDSTRAppend(&cookie, "=", 1); DpsDSTRAppendStr(&cookie, DpsSQLValue(&Res, i, 1)); } if (rows == 0) { DpsCookiesAdd(Indexer, hostinfo, "/", "", "", 'n', 0, 0, 0); } } DpsSQLFree(&Res); if (Indexer->flags & DPS_FLAG_UNOCON) { DPS_RELEASELOCK(Indexer, DPS_LOCK_DB); } hostinfo = strchr(hostinfo, '.'); if (hostinfo != NULL) hostinfo++; } } if (cookie.data_size) { DpsVarListReplaceStr(&Doc->RequestHeaders, "Cookie", cookie.data); } DpsDSTRFree(&cookie); #endif TRACE_OUT(Indexer); return; }