int getURL_tcl(ClientData clientData, Tcl_Interp *interp, int argc, char **argv) { if (argc >= 3) { char *listcheck = argv[1]; char *url = argv[2]; char * traceflags = argc==4 ? argv[3] : ""; if (url && (strcmp(listcheck, "listrefs")==0)) { /* Create a new robot object */ Robot *robot = Robot_new(); /* Turn on trace messages */ if (argc==4) HTSetTraceMessageMask(traceflags); /* Set up the HTML parser */ { HTList * converters = HTFormat_conversion(); HTMLInit(converters); } /* Find an anchor and load it */ robot->anchor = (HTParentAnchor *) HTAnchor_findAddress(url); HTLoadAnchor((HTAnchor *) robot->anchor, robot->request); /* Update TcL return */ Tcl_AppendResult(interp, Reference_List(robot), NULL); Robot_delete(robot); return TCL_OK; } Tcl_AppendResult(interp, bad_vars, NULL); return TCL_ERROR; } if (argc == 2) { char *url = argv[1]; if (url) { /* Create a new robot object */ Robot *robot = Robot_new(); HTChunk *chunk = 0; /* Find an anchor and load it */ HTRequest_setOutputFormat(robot->request, WWW_SOURCE); chunk = HTLoadToChunk(url, robot->request); /* Update TcL return */ Tcl_AppendResult(interp, HTChunkData(chunk), NULL); Robot_delete(robot); HTChunk_delete(chunk); return TCL_OK; } Tcl_AppendResult(interp, bad_vars, NULL); return TCL_ERROR; } Tcl_AppendResult(interp, err_string, argv[0], " [listrefs] url traceflags", NULL); return TCL_ERROR; }
PRIVATE BOOL HTRank (HTRequest * request, HTArray * variants) { HTContentDescription * cd; void ** data; if (!variants) { HTTRACE(PROT_TRACE, "Ranking..... No variants\n"); return NO; } /* ** Walk through the list of local and global preferences and find the ** overall q factor for each variant */ cd = (HTContentDescription *) HTArray_firstObject(variants, data); while (cd) { double ctq_local = type_value(cd->content_type, HTRequest_conversion(request)); double ctq_global = type_value(cd->content_type, HTFormat_conversion()); double clq_local = lang_value(cd->content_language, HTRequest_language(request)); double clq_global = lang_value(cd->content_language, HTFormat_language()); double ceq_local = encoding_value(cd->content_encoding, HTRequest_encoding(request)); double ceq_global = encoding_value(cd->content_encoding, HTFormat_contentCoding()); HTTRACE(PROT_TRACE, "Qualities... Content type: %.3f, Content language: %.3f, Content encoding: %.3f\n" _ HTMAX(ctq_local, ctq_global) _ HTMAX(clq_local, clq_global) _ HTMAX(ceq_local, ceq_global)); cd->quality *= (HTMAX(ctq_local, ctq_global) * HTMAX(clq_local, clq_global) * HTMAX(ceq_local, ceq_global)); cd = (HTContentDescription *) HTArray_nextObject(variants, data); } /* Sort the array of all our accepted preferences */ HTArray_sort(variants, VariantSort); /* Write out the result */ #ifdef HTDEBUG if (PROT_TRACE) { int cnt = 1; cd = (HTContentDescription *) HTArray_firstObject(variants, data); HTTRACE(PROT_TRACE, "Ranking.....\n"); HTTRACE(PROT_TRACE, "RANK QUALITY CONTENT-TYPE LANGUAGE ENCODING FILE\n"); while (cd) { HTTRACE(PROT_TRACE, "%d. %.4f %-20.20s %-8.8s %-10.10s %s\n" _ cnt++ _ cd->quality _ cd->content_type ? HTAtom_name(cd->content_type) : "-" _ cd->content_language?HTAtom_name(cd->content_language):"-" _ cd->content_encoding?HTAtom_name(cd->content_encoding):"-" _ cd->filename ? cd->filename :"-"); cd = (HTContentDescription *) HTArray_nextObject(variants, data); } } #endif /* HTDEBUG */ return YES; }
/* Load one line of configuration ** ------------------------------ ** Call this, for example, to load a X resource with config info. ** Returns YES if line OK, else NO */ PUBLIC BOOL HTRule_parseLine (HTList * list, const char * config) { HTRuleOp op; char * line = NULL; char * ptr; char * word1, * word2, * word3; int status; if (!config) return NO; if ((ptr = strchr(config, '#'))) *ptr = '\0'; StrAllocCopy(line, config); /* Get our own copy */ ptr = line; HTTRACE(APP_TRACE, "Rule Parse.. `%s\'\n" _ config ? config : "<null>"); if ((word1 = HTNextField(&ptr)) == NULL) { /* Empty line */ HT_FREE(line); return YES; } if ((word2 = HTNextField(&ptr)) == NULL) { HTTRACE(APP_TRACE, "Rule Parse.. Insufficient operands: `%s\'\n" _ line); HT_FREE(line); return NO; } word3 = HTNextField(&ptr); /* Look for things we recognize */ if (!strcasecomp(word1, "addtype")) { double quality; char * encoding = HTNextField(&ptr); status = ptr ? sscanf(ptr, "%lf", &quality) : 0; HTBind_add(word2, /* suffix */ word3, /* type */ encoding ? encoding : "binary", /* encoding */ NULL, /* cte */ NULL, /* language */ status >= 1? quality : 1.0); /* quality */ } else if (!strcasecomp(word1, "addencoding")) { double quality; status = ptr ? sscanf(ptr, "%lf", &quality) : 0; HTBind_addEncoding(word2, word3, status >= 1 ? quality : 1.0); } else if (!strcasecomp(word1, "addlanguage")) { double quality; status = ptr ? sscanf(ptr, "%lf", &quality) : 0; HTBind_addLanguage(word2, word3, status >= 1 ? quality : 1.0); } else if (!strcasecomp(word1, "presentation")) { HTList * converters = HTFormat_conversion(); double quality, secs, secs_per_byte; status = ptr ? sscanf(ptr,"%lf%lf%lf",&quality,&secs,&secs_per_byte):0; HTPresentation_add(converters, word2, word3, NULL, status >= 1 ? quality : 1.0, status >= 2 ? secs : 0.0, status >= 3 ? secs_per_byte : 0.0); } else if (!strcasecomp(word1, "proxy")) { HTProxy_add(word2, word3); } else if (!strcasecomp(word1, "noproxy")) { int port = 0; status = ptr ? sscanf(ptr, "%d", &port) : 0; HTNoProxy_add(word2, word3, port); } else if (!strcasecomp(word1, "gateway")) { HTGateway_add(word2, word3); } else { op = 0==strcasecomp(word1, "map") ? HT_Map : 0==strcasecomp(word1, "pass") ? HT_Pass : 0==strcasecomp(word1, "fail") ? HT_Fail : HT_Invalid; if (op == HT_Invalid) { HTTRACE(APP_TRACE, "Rule Parse.. Bad or unknown: `%s'\n" _ config); } else HTRule_add(list, op, word2, word3); } HT_FREE(line); return YES; }
/* HTTPMakeRequest ** --------------- ** Makes a HTTP/1.0-1.1 request header. */ PRIVATE int HTTPMakeRequest (HTStream * me, HTRequest * request) { HTMethod method = HTRequest_method(request); HTRqHd request_mask = HTRequest_rqHd(request); HTParentAnchor * anchor = HTRequest_anchor(request); char * etag = HTAnchor_etag(anchor); char crlf[3]; char qstr[10]; *crlf = CR; *(crlf+1) = LF; *(crlf+2) = '\0'; /* Generate the HTTP/1.x RequestLine */ if (me->state == 0) { if (method != METHOD_INVALID) { PUTS(HTMethod_name(method)); PUTC(' '); } else PUTS("GET "); me->state++; } /* ** Generate the Request URI. If we are using full request URI then it's ** easy. Otherwise we must filter out the path part of the URI. ** In case it's a OPTIONS request then if there is no pathinfo then use ** a * instead. If we use a method different from GET or HEAD then use ** the content-location if available. */ if (me->state == 1) { char * abs_location = NULL; char * addr = HTAnchor_physical(anchor); char * location; /* JK: If the application specified a content-location (which is stored in the request in default put-name!), we use it instead of the URL that's being saved to. This is like having a user defined Content-Location */ location = HTRequest_defaultPutName (request); if (location) { if (HTURL_isAbsolute (location)) { char * relative; relative = HTRelative (location, location); abs_location = HTParse (relative + 2, addr, PARSE_ALL); HT_FREE (relative); } else abs_location = HTParse (location, addr, PARSE_ALL); addr = abs_location; } #if 0 /* ** We don't use the content-location any more as it is superseeded ** by etags and the combination of the two might do more harm than ** good (The etag is not guaranteed to be unique over multiple URIs) */ /* ** If we are using a method different from HEAD and GET then use ** the Content-Location if available, else the Request-URI. */ if (!HTMethod_isSafe(method)) { char * location = HTAnchor_location(anchor); if (location) { if (HTURL_isAbsolute(location)) addr = location; else { /* ** We have a content-location but it is relative and ** must expand it either to the content-base or to ** the Request-URI itself. */ char * base = HTAnchor_base(anchor); abs_location = HTParse(location, base, PARSE_ALL); addr = abs_location; } } } #endif /* ** If we are using a proxy or newer versions of HTTP then we can ** send the full URL. Otherwise we only send the path. */ if (HTRequest_fullURI(request)) StrAllocCopy(me->url, addr); else { me->url = HTParse(addr, "", PARSE_PATH | PARSE_PUNCTUATION); if (method == METHOD_OPTIONS) { /* ** We don't preserve the final slash or lack of same through ** out the code. This is mainly for optimization reasons ** but it gives a problem OPTIONS. We can either send a "*" ** or a "/" but not both. For now we send a "*". */ if (!strcmp(me->url, "/")) *me->url = '*'; } } HT_FREE(abs_location); me->state++; } /* ** Now send the URL that we have put together */ if (me->state == 2) { int status = HT_OK; if ((status = PUTS(me->url)) != HT_OK) return status; me->state++; #if 0 fprintf(stderr, "Requesting '%s'\n", me->url); #endif } PUTC(' '); /* ** Send out the version number. If we know it is a HTTP/1.0 server we ** are talking to then use HTTP/1.0, else use HTTP/1.1 as default version ** number */ if (me->version == HTTP_10) PUTS(HTTP_VERSION_10); else PUTS(HTTP_VERSION); PUTBLOCK(crlf, 2); /* Request Headers */ if (request_mask & HT_C_ACCEPT_TYPE) { HTFormat format = HTRequest_outputFormat(request); /* ** If caller has specified a specific output format then use this. ** Otherwise use all the registered converters to generate the ** accept header */ if (format == WWW_PRESENT) { int list; HTList *cur; BOOL first=YES; for (list=0; list<2; list++) { if ((!list && ((cur = HTFormat_conversion()) != NULL)) || (list && ((cur = HTRequest_conversion(request))!=NULL))) { HTPresentation * pres; while ((pres=(HTPresentation *) HTList_nextObject(cur))) { if (pres->rep_out==WWW_PRESENT && pres->quality<=1.0) { if (first) { PUTS("Accept: "); first=NO; } else PUTC(','); PUTS(HTAtom_name(pres->rep)); if (pres->quality < 1.0 && pres->quality >= 0.0) { sprintf(qstr, ";q=%1.1f", pres->quality); PUTS(qstr); } } } } } if (!first) PUTBLOCK(crlf, 2); } else { /* ** If we have an explicit output format then only send ** this one if not this is an internal libwww format ** of type www/<star> */ if (!HTMIMEMatch(WWW_INTERNAL, format)) { PUTS("Accept: "); PUTS(HTAtom_name(format)); PUTBLOCK(crlf, 2); } } } if (request_mask & HT_C_ACCEPT_CHAR) { int list; HTList *cur; BOOL first=YES; for (list=0; list<2; list++) { if ((!list && ((cur = HTFormat_charset()) != NULL)) || (list && ((cur = HTRequest_charset(request)) != NULL))) { HTAcceptNode *pres; while ((pres = (HTAcceptNode *) HTList_nextObject(cur))) { if (first) { PUTS("Accept-Charset: "); first=NO; } else PUTC(','); PUTS(HTAtom_name(pres->atom)); if (pres->quality < 1.0 && pres->quality >= 0.0) { sprintf(qstr, ";q=%1.1f", pres->quality); PUTS(qstr); } } } } if (!first) PUTBLOCK(crlf, 2); } if (request_mask & HT_C_ACCEPT_ENC) { int list; HTList *cur; BOOL first=YES; for (list=0; list<2; list++) { if ((!list && ((cur = HTFormat_contentCoding()) != NULL)) || (list && ((cur = HTRequest_encoding(request)) != NULL))) { HTCoding * pres; while ((pres = (HTCoding *) HTList_nextObject(cur))) { double quality = HTCoding_quality(pres); if (first) { PUTS("Accept-Encoding: "); first = NO; } else PUTC(','); PUTS(HTCoding_name(pres)); if (quality < 1.0 && quality >= 0.0) { sprintf(qstr, ";q=%1.1f", quality); PUTS(qstr); } } } } if (!first) PUTBLOCK(crlf, 2); } if (request_mask & HT_C_ACCEPT_TE) { int list; HTList *cur; BOOL first=YES; for (list=0; list<2; list++) { if ((!list && ((cur = HTFormat_transferCoding()) != NULL)) || (list && ((cur = HTRequest_transfer(request)) != NULL))) { HTCoding * pres; while ((pres = (HTCoding *) HTList_nextObject(cur))) { double quality = HTCoding_quality(pres); const char * coding = HTCoding_name(pres); if (first) { PUTS("TE: "); first = NO; } else PUTC(','); /* Special check for "chunked" which is translated to "trailers" */ if (!strcasecomp(coding, "chunked")) PUTS("trailers"); else PUTS(coding); if (quality < 1.0 && quality >= 0.0) { sprintf(qstr, ";q=%1.1f", quality); PUTS(qstr); } } } } if (!first) PUTBLOCK(crlf, 2); } if (request_mask & HT_C_ACCEPT_LAN) { int list; HTList *cur; BOOL first=YES; for (list=0; list<2; list++) { if ((!list && ((cur = HTFormat_language()) != NULL)) || (list && ((cur = HTRequest_language(request)) != NULL))) { HTAcceptNode *pres; while ((pres = (HTAcceptNode *) HTList_nextObject(cur))) { if (first) { PUTS("Accept-Language: "); first=NO; } else PUTC(','); PUTS(HTAtom_name(pres->atom)); if (pres->quality < 1.0 && pres->quality >= 0.0) { sprintf(qstr, ";q=%1.1f", pres->quality); PUTS(qstr); } } } } if (!first) PUTBLOCK(crlf, 2); } if (request_mask & HT_C_AUTH) { HTAssocList * cur = HTRequest_credentials(request); if (cur) { /* Access authentication */ HTAssoc * pres; while ((pres = (HTAssoc *) HTAssocList_nextObject(cur))) { PUTS(HTAssoc_name(pres)); PUTS(": "); PUTS(HTAssoc_value(pres)); PUTBLOCK(crlf, 2); } } } if (request_mask & HT_C_EXPECT) { HTAssocList * cur = HTRequest_expect(request); if (cur) { BOOL first=YES; HTAssoc * pres; while ((pres = (HTAssoc *) HTAssocList_nextObject(cur))) { char * value = HTAssoc_value(pres); if (first) { PUTS("Expect: "); first = NO; } else PUTC(','); /* Output the name */ PUTS(HTAssoc_name(pres)); /* Only output the value if not empty string */ if (*value) { PUTS("="); PUTS(value); } } PUTBLOCK(crlf, 2); } } if (request_mask & HT_C_FROM) { HTUserProfile * up = HTRequest_userProfile(request); const char * mailaddress = HTUserProfile_email(up); if (mailaddress) { PUTS("From: "); PUTS(mailaddress); PUTBLOCK(crlf, 2); } } if (request_mask & HT_C_HOST) { char *orig = HTAnchor_address((HTAnchor *) anchor); char *host = HTParse(orig, "", PARSE_HOST); char hostace[256]; #if 0 /* Keep the port number for HTTP/1.1 compliance */ char *ptr = strchr(host, ':'); /* Chop off port number */ if (ptr) *ptr = '\0'; #endif PUTS("Host: "); /****** still have to check UTF8toACE with port number */ if (!HTACEfromUTF8 (host, hostace, 255)) { PUTS(hostace); } else { PUTS(host); /* this may be dangerous, but helps server side debugging */ HTTRACE(PROT_TRACE, "HTTP........ Error: Cannot convert to ACE: `%s\'\n" _ host); } PUTBLOCK(crlf, 2); HT_FREE(orig); HT_FREE(host); } /* ** In the "If-*" series of headers, the ones related to etags have higher ** priority than the date relates ones. That is, if we have a etag then ** use that, otherwise use the date. First we check for range, match, and ** unmodified-since. */ if (request_mask & HT_C_IF_RANGE && etag) { PUTS("If-Range: \""); PUTS(etag); PUTC('"'); PUTBLOCK(crlf, 2); HTTRACE(PROT_TRACE, "HTTP........ If-Range using etag `%s\'\n" _ etag); } else if (request_mask & HT_C_IF_MATCH_ANY) { PUTS("If-Match: *"); PUTBLOCK(crlf, 2); HTTRACE(PROT_TRACE, "HTTP........ If-Match using `*\'\n"); } else if (request_mask & HT_C_IF_MATCH && etag) { PUTS("If-Match: \""); PUTS(etag); PUTC('"'); PUTBLOCK(crlf, 2); HTTRACE(PROT_TRACE, "HTTP........ If-Match using etag `%s\'\n" _ etag); } else if (request_mask & HT_C_IF_UNMOD_SINCE) { time_t lm = HTAnchor_lastModified(anchor); if (lm > 0) { PUTS("If-Unmodified-Since: "); PUTS(HTDateTimeStr(&lm, NO)); PUTBLOCK(crlf, 2); HTTRACE(PROT_TRACE, "HTTP........ If-Unmodified-Since `%s\'\n" _ HTDateTimeStr(&lm, NO)); } } /* ** If-None-Match and If-Modified-Since are equivalent except that the ** first uses etags and the second uses dates. Etags have precedence over ** dates. */ if (request_mask & HT_C_IF_NONE_MATCH_ANY) { PUTS("If-None-Match: *"); PUTBLOCK(crlf, 2); HTTRACE(PROT_TRACE, "HTTP........ If-None-Match using `*\'\n"); } else if (request_mask & HT_C_IF_NONE_MATCH && etag) { PUTS("If-None-Match: \""); PUTS(etag); PUTC('"'); PUTBLOCK(crlf, 2); HTTRACE(PROT_TRACE, "HTTP........ If-None-Match `%s\'\n" _ etag); } if (request_mask & HT_C_IMS) { time_t lm = HTAnchor_lastModified(anchor); if (lm > 0) { PUTS("If-Modified-Since: "); PUTS(HTDateTimeStr(&lm, NO)); PUTBLOCK(crlf, 2); HTTRACE(PROT_TRACE, "HTTP........ If-Modified-Since `%s\'\n" _ HTDateTimeStr(&lm, NO)); } } /* ** Max forwards is mainly for TRACE where we want to be able to stop the ** TRACE at a specific location un the message path. */ if (request_mask & HT_C_MAX_FORWARDS) { int hops = HTRequest_maxForwards(request); if (hops >= 0) { sprintf(qstr, "%d", hops); PUTS("Max-Forwards: "); PUTS(qstr); PUTBLOCK(crlf, 2); } } /* ** Range requests. For now, we only take the first entry registered for ** this request. This means that you can only send a single "unit" and ** then a set of range within this unit. This is in accordance with ** HTTP/1.1. Multiple units will go on multiple lines. */ if (request_mask & HT_C_RANGE) { HTAssocList * cur = HTRequest_range(request); if (cur) { /* Range requests */ HTAssoc * pres; while ((pres = (HTAssoc *) HTAssocList_nextObject(cur))) { PUTS("Range: "); PUTS(HTAssoc_name(pres)); /* Unit */ PUTS("="); PUTS(HTAssoc_value(pres)); /* Ranges within this unit */ PUTBLOCK(crlf, 2); } } } if (request_mask & HT_C_REFERER) { HTParentAnchor * parent_anchor = HTRequest_parent(request); if (parent_anchor) { char * act = HTAnchor_address((HTAnchor *) anchor); char * parent = HTAnchor_address((HTAnchor *) parent_anchor); #if 1 char * relative = HTRelative(parent, act); #else char * relative = HTParse(parent, act, PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION); #endif if (relative && *relative) { PUTS("Referer: "); PUTS(relative); PUTBLOCK(crlf, 2); } HT_FREE(act); HT_FREE(parent); HT_FREE(relative); } } if (request_mask & HT_C_USER_AGENT) { PUTS("User-Agent: "); PUTS(HTLib_appName()); PUTC('/'); PUTS(HTLib_appVersion()); PUTC(' '); PUTS(HTLib_name()); PUTC('/'); PUTS(HTLib_version()); PUTBLOCK(crlf, 2); } HTTRACE(PROT_TRACE, "HTTP........ Generating HTTP/1.x Request Headers\n"); return HT_OK; }