/* Determine a suitable suffix ** --------------------------- ** Use the set of bindings to find a suitable suffix (or index) ** for a certain combination of language, media type and encoding ** given in the anchor. ** ** Returns a pointer to a suitable suffix string that must be freed ** by the caller. If more than one suffix is found they are all ** concatenated using the first delimiter in HTDelimiters. ** If no suffix is found, NULL is returned. */ PUBLIC char * HTBind_getSuffix (HTParentAnchor * anchor) { int cnt; HTList * cur; HTChunk * suffix = HTChunk_new(48); char delimiter = *HTDelimiters; char * ct=NULL, * ce=NULL, * cl=NULL; HTFormat format = HTAnchor_format(anchor); HTList * encoding = HTAnchor_encoding(anchor); HTList * language = HTAnchor_language(anchor); if (!HTBindings) HTBind_init(); if (anchor) { for (cnt=0; cnt<HT_L_HASH_SIZE; cnt++) { if ((cur = HTBindings[cnt])) { HTBind *pres; while ((pres = (HTBind *) HTList_nextObject(cur))) { if (!ct && (pres->type && pres->type == format)){ ct = pres->suffix; } else if (!ce && pres->encoding && encoding) { HTList * cur_enc = encoding; HTEncoding pres_enc; while ((pres_enc = (HTEncoding) HTList_nextObject(cur_enc))) { if (pres_enc == pres->encoding) { ce = pres->suffix; break; } } } else if (!cl && pres->language && language) { HTList * cur_lang = language; HTLanguage pres_lang; while ((pres_lang = (HTLanguage) HTList_nextObject(cur_lang))) { if (pres_lang == pres->language) { cl = pres->suffix; break; } } } } } } /* Put the found suffixes together */ if (ct) { HTChunk_putc(suffix, delimiter); HTChunk_puts(suffix, ct); } if (ce) { HTChunk_putc(suffix, delimiter); HTChunk_puts(suffix, ce); } if (cl) { HTChunk_putc(suffix, delimiter); HTChunk_puts(suffix, cl); } } return HTChunk_toCString(suffix); }
/* ** This is our handle to the server reply stream when data is coming ** back from our "client" request. It is responsible for setting up the ** remaining streams in order to produce a complete HTTP output. ** If we have a HTTP 1.x response then forward untouched. */ PRIVATE int MakeReplyPipe (HTStream * me, HTRequest * client) { char * response_line = NULL; /* Generate the Response line */ { HTAlertCallback *cbf = HTAlert_find(HT_A_MESSAGE); if (cbf) { HTAlertPar * reply = HTAlert_newReply(); if ((*cbf)(client, HT_A_MESSAGE, HT_MSG_NULL, NULL, HTRequest_error(client), reply)) response_line = HTAlert_replyMessage(reply); HTAlert_deleteReply(reply); } if (response_line) { PUTS(response_line); HT_FREE(response_line); } else { PUTS(HTTP_VERSION); PUTS(" 500 Internal"); PUTC(CR); PUTC(LF); } } /* ** We now have to create the rest of the response stream. We see whether ** there is a data object or not by looking at the Content Type of the ** client anchor. */ { HTParentAnchor * anchor = HTRequest_anchor(client); HTFormat format = HTAnchor_format(anchor); me->target = (format == WWW_UNKNOWN) ? HTTPResponse_new(client, me->target, YES, HTTP_11) : HTMIMERequest_new(client, HTTPResponse_new(client,me->target, NO, HTTP_11), YES); } return HT_OK; }
/* Partial Response MIME parser stream ** ----------------------------------- ** In case we sent a Range conditional GET we may get back a partial ** response. This response must be appended to the already existing ** cache entry before presented to the user. ** We do this by continuing to load the new object into a temporary ** buffer and at the same time start the cache load of the already ** existing object. When we have loaded the cache we merge the two ** buffers. */ PUBLIC HTStream * HTMIMEPartial (HTRequest * request, void * param, HTFormat input_format, HTFormat output_format, HTStream * output_stream) { #ifndef NO_CACHE HTParentAnchor * anchor = HTRequest_anchor(request); HTFormat format = HTAnchor_format(anchor); HTStream * pipe = NULL; /* ** The merge stream is a place holder for where we can put data when it ** arrives. We have two feeds: one from the cache and one from the net. ** We call the stream stack already now to get the right output stream. ** We can do this as we already know the content type from when we got the ** first part of the object. */ HTStream * merge = HTMerge(HTStreamStack(format, output_format, output_stream, request, YES), 2); /* ** Now we create the MIME parser stream in partial data mode. We also ** set the target to our merge stream. */ HTStream * me = HTMIMEConvert(request, param, input_format, output_format, output_stream); me->mode |= HT_MIME_PARTIAL; me->target = merge; #if 0 /* JK: this doesn't work because this work is repeated before */ /* ** Create the cache append stream, and a Tee stream */ { HTStream * append = HTStreamStack(WWW_CACHE_APPEND, output_format, output_stream, request, NO); if (append) me->target = HTTee(me->target, append, NULL); } #endif /* ** Create the pipe buffer stream to buffer the data that we read ** from the network */ if ((pipe = HTPipeBuffer(me->target, 0))) me->target = pipe; /* ** Now start the second load from the cache. First we read this data from ** the cache and then we flush the data that we have read from the net. */ { HTRequest * cache_request = HTRequest_new(); /* ** Set the output format to source and the output stream to the ** merge stream. As we have already set up the stream pipe, we just ** load it as source. */ HTRequest_setOutputFormat(cache_request, WWW_SOURCE); HTRequest_setOutputStream(cache_request, merge); /* ** Bind the anchor to the new request and also register a local ** AFTER filter to flush the pipe buffer so that we can get ** rest of the data through. */ HTRequest_setAnchor(cache_request, (HTAnchor *) anchor); HTRequest_addBefore(cache_request, HTCacheLoadFilter, NULL, NULL, HT_FILTER_FIRST, YES); HTRequest_addAfter(cache_request, HTCacheFlushFilter, NULL, pipe, HT_ALL, HT_FILTER_FIRST, YES); HTTRACE(STREAM_TRACE, "Partial..... Starting cache load\n"); HTLoad(cache_request, NO); } return me; #else return NULL; #endif }
PRIVATE int FileEvent (SOCKET soc, void * pVoid, HTEventType type) { file_info *file = pVoid; /* Specific access information */ int status = HT_ERROR; HTNet * net = file->net; HTRequest * request = HTNet_request(net); HTParentAnchor * anchor = HTRequest_anchor(request); /* ** Initiate a new file structure and bind to request structure ** This is actually state FILE_BEGIN, but it can't be in the state ** machine as we need the structure first. */ if (type == HTEvent_CLOSE) { /* Interrupted */ HTRequest_addError(request, ERR_FATAL, NO, HTERR_INTERRUPTED, NULL, 0, "HTLoadFile"); FileCleanup(request, HT_INTERRUPTED); return HT_OK; } /* Now jump into the machine. We know the state from the previous run */ while (1) { switch (file->state) { case FS_BEGIN: /* We only support safe (GET, HEAD, etc) methods for the moment */ if (!HTMethod_isSafe(HTRequest_method(request))) { HTRequest_addError(request, ERR_FATAL, NO, HTERR_NOT_ALLOWED, NULL, 0, "HTLoadFile"); file->state = FS_ERROR; break; } /* Check whether we have access to local disk at all */ if (HTLib_secure()) { HTTRACE(PROT_TRACE, "LoadFile.... No access to local file system\n"); file->state = FS_TRY_FTP; break; } file->local = HTWWWToLocal(HTAnchor_physical(anchor), "", HTRequest_userProfile(request)); if (!file->local) { file->state = FS_TRY_FTP; break; } /* Create a new host object and link it to the net object */ { HTHost * host = NULL; if ((host = HTHost_new("localhost", 0)) == NULL) return HT_ERROR; HTNet_setHost(net, host); if (HTHost_addNet(host, net) == HT_PENDING) { HTTRACE(PROT_TRACE, "HTLoadFile.. Pending...\n"); /* move to the hack state */ file->state = FS_PENDING; return HT_OK; } } file->state = FS_DO_CN; break; case FS_PENDING: /* ** 2000/08/10 JK : This is a funny state. Because of the ** internal libwww stacks, when doing multiple local ** requests (e.g., while using the Robot), we need to ask ** again for the host object. If we had jumped directly to ** the FS_DO_CN state, libwww would have blocked because ** of socket starvation. ** This state is similar to FS_BEGINNING, but just requests ** the host object. ** YES. THIS IS AN UGLY HACK!! */ { HTHost * host = NULL; if ((host = HTHost_new("localhost", 0)) == NULL) return HT_ERROR; HTNet_setHost(net, host); if (HTHost_addNet(host, net) == HT_PENDING) { HTTRACE(PROT_TRACE, "HTLoadFile.. Pending...\n"); file->state = FS_PENDING; return HT_OK; } } file->state = FS_DO_CN; break; case FS_DO_CN: /* ** If we have to do content negotiation then find the object that ** fits best into either what the client has indicated in the ** accept headers or what the client has registered on its own. ** The object chosen can in fact be a directory! However, content ** negotiation only makes sense if we can read the directory! ** We stat the file in order to find the size and to see it if ** exists. */ if (HTRequest_negotiation(request) && HTMethod_isSafe(HTRequest_method(request))) { char * conneg = HTMulti(request, file->local,&file->stat_info); if (conneg) { HT_FREE(file->local); file->local = conneg; HTAnchor_setPhysical(anchor, conneg); HTTRACE(PROT_TRACE, "Load File... Found `%s\'\n" _ conneg); } else { HTTRACE(PROT_TRACE, "Load File... Not found - even tried content negotiation\n"); HTRequest_addError(request, ERR_FATAL, NO, HTERR_NOT_FOUND, NULL, 0, "HTLoadFile"); file->state = FS_ERROR; break; } } else { if (HT_STAT(file->local, &file->stat_info) == -1) { HTTRACE(PROT_TRACE, "Load File... Not found `%s\'\n" _ file->local); HTRequest_addError(request, ERR_FATAL, NO, HTERR_NOT_FOUND, NULL, 0, "HTLoadFile"); file->state = FS_ERROR; break; } } /* ** Check to see if the 'localname' is in fact a directory. ** Note that we can't do a HEAD on a directory */ if (((file->stat_info.st_mode) & S_IFMT) == S_IFDIR) { if (HTRequest_method(request) == METHOD_GET) file->state = FS_PARSE_DIR; else { HTRequest_addError(request, ERR_INFO, NO, HTERR_NO_CONTENT, NULL, 0, "HTLoadFile"); file->state = FS_NO_DATA; } break; } /* ** If empty file then only serve it if it is editable. We also get ** the bindings for the file suffixes in lack of better bindings */ { BOOL editable = HTEditable(file->local, &file->stat_info); if (file_suffix_binding) HTBind_getAnchorBindings(anchor); if (editable) HTAnchor_appendAllow(anchor, METHOD_PUT); /* Set the file size */ if (file->stat_info.st_size) HTAnchor_setLength(anchor, file->stat_info.st_size); /* Set the file last modified time stamp */ if (file->stat_info.st_mtime > 0) HTAnchor_setLastModified(anchor, file->stat_info.st_mtime); /* Check to see if we can edit it */ if (!editable && !file->stat_info.st_size) { HTRequest_addError(request, ERR_INFO, NO, HTERR_NO_CONTENT, NULL, 0, "HTLoadFile"); file->state = FS_NO_DATA; } else { file->state = (HTRequest_method(request)==METHOD_GET) ? FS_NEED_OPEN_FILE : FS_GOT_DATA; } } break; case FS_NEED_OPEN_FILE: status = HTFileOpen(net, file->local, HT_FB_RDONLY); if (status == HT_OK) { /* ** Create the stream pipe FROM the channel to the application. ** The target for the input stream pipe is set up using the ** stream stack. */ { HTStream * rstream = HTStreamStack(HTAnchor_format(anchor), HTRequest_outputFormat(request), HTRequest_outputStream(request), request, YES); HTNet_setReadStream(net, rstream); HTRequest_setOutputConnected(request, YES); } /* ** Create the stream pipe TO the channel from the application ** and hook it up to the request object */ { HTOutputStream * output = HTNet_getOutput(net, NULL, 0); HTRequest_setInputStream(request, (HTStream *) output); } /* ** Set up concurrent read/write if this request isn't the ** source for a PUT or POST. As source we don't start reading ** before all destinations are ready. If destination then ** register the input stream and get ready for read */ if (HTRequest_isSource(request) && !HTRequest_destinationsReady(request)) return HT_OK; HTRequest_addError(request, ERR_INFO, NO, HTERR_OK, NULL, 0, "HTLoadFile"); file->state = FS_NEED_BODY; /* If we are _not_ using preemptive mode and we are Unix fd's ** then return here to get the same effect as when we are ** connecting to a socket. That way, HTFile acts just like any ** other protocol module even though we are in fact doing ** blocking connect */ if (HTEvent_isCallbacksRegistered()) { if (!HTRequest_preemptive(request)) { if (!HTNet_preemptive(net)) { HTTRACE(PROT_TRACE, "HTLoadFile.. Returning\n"); HTHost_register(HTNet_host(net), net, HTEvent_READ); } else if (!file->timer) { HTTRACE(PROT_TRACE, "HTLoadFile.. Returning\n"); file->timer = HTTimer_new(NULL, ReturnEvent, file, 1, YES, NO); } return HT_OK; } } } else if (status == HT_WOULD_BLOCK || status == HT_PENDING) return HT_OK; else { HTRequest_addError(request, ERR_INFO, NO, HTERR_INTERNAL, NULL, 0, "HTLoadFile"); file->state = FS_ERROR; /* Error or interrupt */ } break; case FS_NEED_BODY: status = HTHost_read(HTNet_host(net), net); if (status == HT_WOULD_BLOCK) return HT_OK; else if (status == HT_LOADED || status == HT_CLOSED) { file->state = FS_GOT_DATA; } else { HTRequest_addError(request, ERR_INFO, NO, HTERR_FORBIDDEN, NULL, 0, "HTLoadFile"); file->state = FS_ERROR; } break; case FS_PARSE_DIR: status = HTFile_readDir(request, file); if (status == HT_LOADED) file->state = FS_GOT_DATA; else file->state = FS_ERROR; break; case FS_TRY_FTP: { char *url = HTAnchor_physical(anchor); HTAnchor *anchor; char *newname = NULL; StrAllocCopy(newname, "ftp:"); if (!strncmp(url, "file:", 5)) StrAllocCat(newname, url+5); else StrAllocCat(newname, url); anchor = HTAnchor_findAddress(newname); HTRequest_setAnchor(request, anchor); HT_FREE(newname); FileCleanup(request, HT_IGNORE); return HTLoad(request, YES); } break; case FS_GOT_DATA: FileCleanup(request, HT_LOADED); return HT_OK; break; case FS_NO_DATA: FileCleanup(request, HT_NO_DATA); return HT_OK; break; case FS_RETRY: FileCleanup(request, HT_RETRY); return HT_OK; break; case FS_ERROR: FileCleanup(request, HT_ERROR); return HT_OK; break; } } /* End of while(1) */ }
PRIVATE void HTML_start_element (HTStructured * me, int element_number, const BOOL * present, const char ** value) { HTChildAnchor * address = NULL; if (!me->started) { HTextImp_build(me->text, HTEXT_BEGIN); me->started = YES; } /* Look at what element was started */ switch (element_number) { case HTML_A: if (present[HTML_A_HREF] && value[HTML_A_HREF]) { address = HTAnchor_findChildAndLink( me->node_anchor, /* parent */ present[HTML_A_NAME] ? value[HTML_A_NAME] : NULL, /* Tag */ value[HTML_A_HREF], /* Addresss */ present[HTML_A_REL] && value[HTML_A_REL] ? (HTLinkType) HTAtom_caseFor(value[HTML_A_REL]) : NULL); if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) { HTLink * link = HTAnchor_mainLink((HTAnchor *) address); HTParentAnchor * dest = HTAnchor_parent(HTLink_destination(link)); if (!HTAnchor_title(dest)) HTAnchor_setTitle(dest, value[HTML_A_TITLE]); } HTextImp_foundLink(me->text, element_number, HTML_A_HREF, address, present, value); HTTRACE(SGML_TRACE, "HTML Parser. Anchor `%s\'\n" _ value[HTML_A_HREF]); } break; case HTML_AREA: if (present[HTML_AREA_HREF] && value[HTML_AREA_HREF]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_AREA_HREF], NULL); HTextImp_foundLink(me->text, element_number, HTML_AREA_HREF, address, present, value); HTTRACE(SGML_TRACE, "HTML Parser. Image map area `%s\'\n" _ value[HTML_AREA_HREF]); } break; case HTML_BASE: if (present[HTML_BASE_HREF] && value[HTML_BASE_HREF]) { HTAnchor_setBase(me->node_anchor, (char *) value[HTML_BASE_HREF]); HTTRACE(SGML_TRACE, "HTML Parser. New base `%s\'\n" _ value[HTML_BASE_HREF]); } break; case HTML_BODY: if (present[HTML_BODY_BACKGROUND] && value[HTML_BODY_BACKGROUND]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_BODY_BACKGROUND], NULL); HTextImp_foundLink(me->text, element_number, HTML_BODY_BACKGROUND, address, present, value); HTTRACE(SGML_TRACE, "HTML Parser. Background `%s\'\n" _ value[HTML_BODY_BACKGROUND]); } break; case HTML_FORM: if (present[HTML_FORM_ACTION] && value[HTML_FORM_ACTION]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_FORM_ACTION], NULL); HTextImp_foundLink(me->text, element_number, HTML_FORM_ACTION, address, present, value); } break; case HTML_FRAME: if (present[HTML_FRAME_SRC] && value[HTML_FRAME_SRC]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_FRAME_SRC], NULL); HTextImp_foundLink(me->text, element_number, HTML_FRAME_SRC, address, present, value); HTTRACE(SGML_TRACE, "HTML Parser. Frame `%s\'\n" _ value[HTML_FRAME_SRC]); } break; case HTML_INPUT: if (present[HTML_INPUT_SRC] && value[HTML_INPUT_SRC]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_INPUT_SRC], NULL); HTextImp_foundLink(me->text, element_number, HTML_INPUT_SRC, address, present, value); } break; case HTML_IMG: if (present[HTML_IMG_SRC] && value[HTML_IMG_SRC]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_IMG_SRC], NULL); HTextImp_foundLink(me->text, element_number, HTML_IMG_SRC, address, present, value); } break; case HTML_ISINDEX: HTAnchor_setIndex(me->node_anchor); break; case HTML_LINK: if (present[HTML_LINK_HREF] && value[HTML_LINK_HREF]) { HTParentAnchor * dest = NULL; address = HTAnchor_findChildAndLink( me->node_anchor, /* parent */ present[HTML_A_NAME] ? value[HTML_A_NAME] : NULL, /* Tag */ present[HTML_A_HREF] ? value[HTML_A_HREF] : NULL, /* Addresss */ NULL); /* Rels */ dest = HTAnchor_parent(HTAnchor_followMainLink((HTAnchor *) address)); /* If forward reference */ if ((present[HTML_LINK_REL] && value[HTML_LINK_REL])) { char * strval = NULL; char * ptr = NULL; char * relation = NULL; StrAllocCopy(strval, value[HTML_LINK_REL]); ptr = strval; while ((relation = HTNextLWSToken(&ptr)) != NULL) { HTLink_add((HTAnchor *) me->node_anchor, (HTAnchor *) dest, (HTLinkType) HTAtom_caseFor(relation), METHOD_INVALID); } HT_FREE(strval); } /* If reverse reference */ if ((present[HTML_LINK_REV] && value[HTML_LINK_REV])) { char * strval = NULL; char * ptr = NULL; char * relation = NULL; StrAllocCopy(strval, value[HTML_LINK_REV]); ptr = strval; while ((relation = HTNextLWSToken(&ptr)) != NULL) { HTLink_add((HTAnchor *) dest, (HTAnchor *) me->node_anchor, (HTLinkType) HTAtom_caseFor(relation), METHOD_INVALID); } HT_FREE(strval); } /* If we got any type information as well */ if (present[HTML_LINK_TYPE] && value[HTML_LINK_TYPE]) { if (HTAnchor_format(dest) == WWW_UNKNOWN) HTAnchor_setFormat(dest, (HTFormat) HTAtom_caseFor(value[HTML_LINK_TYPE])); } /* Call out to the layout engine */ HTextImp_foundLink(me->text, element_number, HTML_LINK_HREF, address, present, value); } break; case HTML_META: if (present[HTML_META_NAME] && value[HTML_META_NAME]) { HTAnchor_addMeta (me->node_anchor, value[HTML_META_NAME], (present[HTML_META_CONTENT] && value[HTML_META_CONTENT]) ? value[HTML_META_CONTENT] : ""); } break; case HTML_OBJECT: if (present[HTML_OBJECT_CLASSID] && value[HTML_OBJECT_CLASSID]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_OBJECT_CLASSID], NULL); HTextImp_foundLink(me->text, element_number, HTML_OBJECT_CLASSID, address, present, value); } if (present[HTML_OBJECT_CODEBASE] && value[HTML_OBJECT_CODEBASE]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_OBJECT_CODEBASE], NULL); HTextImp_foundLink(me->text, element_number, HTML_OBJECT_CODEBASE, address, present, value); } if (present[HTML_OBJECT_DATA] && value[HTML_OBJECT_DATA]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_OBJECT_DATA], NULL); HTextImp_foundLink(me->text, element_number, HTML_OBJECT_DATA, address, present, value); } if (present[HTML_OBJECT_ARCHIVE] && value[HTML_OBJECT_ARCHIVE]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_OBJECT_ARCHIVE], NULL); HTextImp_foundLink(me->text, element_number, HTML_OBJECT_ARCHIVE, address, present, value); } if (present[HTML_OBJECT_USEMAP] && value[HTML_OBJECT_USEMAP]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_OBJECT_USEMAP], NULL); HTextImp_foundLink(me->text, element_number, HTML_OBJECT_USEMAP, address, present, value); } break; case HTML_PRE: if (me->comment_end) HTextImp_addText(me->text, me->comment_end, strlen(me->comment_end)); break; case HTML_TITLE: HTChunk_truncate(me->title,0); break; } /* Update our parse stack */ if (SGML_findTagContents(me->dtd, element_number) != SGML_EMPTY) { if (me->sp == me->stack) { HTTRACE(SGML_TRACE, "HTML Parser. Maximum nesting of %d exceded!\n" _ MAX_NESTING); me->overflow++; return; } --(me->sp); me->sp[0] = element_number; } /* Call out to the layout engine */ HTextImp_beginElement(me->text, element_number, present, value); }