MODULE_STATIC int mod_config(char *config, int i) { char *p = config; WRLOCK_LANG_CONFIG ; while( *p && IS_SPACE(*p) ) p++; if ( !strncasecmp(p, "CharsetAgent", 12) ) { char *ptr, *agent, *t; struct charset *cs = NULL; p+=12; t = p; while( (agent = (char*)strtok_r(t, " ", &ptr)) ) { /* t is not NULL only on first item which must be charset name. */ /* there was when we add charset name as agentname on second string */ /* Fixed by Peter S. Voronov */ if ( t && !cs && !(cs = lookup_charset_by_name(charsets, agent))) { cs = add_new_charset(&charsets, agent); if ( !cs ) { verb_printf("Can't create charset\n"); goto done; } t = NULL ; continue; } if ( cs && !t) add_to_string_list(&cs->CharsetAgent, agent); t = NULL ; } if ( cs ) { struct string_list *list = cs->CharsetAgent; while(list) { verb_printf("Agent: %s\n", list->string); list = list->next; } } } else if ( !strncasecmp(p, "default_charset", 15) ) { p += 15; while (*p && IS_SPACE(*p) ) p++; strncpy(default_charset, p, sizeof(default_charset)-1); } else if ( !strncasecmp(p, "CharsetRecodeTable", 18) ) { char charset[80], path[MAXPATHLEN]; struct charset *cs; FILE *Tf; if ( sscanf(p+18, " %80s %128s", (char*)&charset, (char*)&path) == 2 ) { verb_printf("<<recodetable for %s: %s>>\n", charset, path); if ( !(cs=lookup_charset_by_name(charsets, charset)) ) { cs = add_new_charset(&charsets, charset); if ( !cs ) { verb_printf("Can't create charset\n"); goto done; } } /* load table */ if ( cs->Table ) xfree(cs->Table); cs->Table = malloc(128); if ( cs->Table ) { int i; for(i=0;i<128;i++) cs->Table[i] = i+128 ; } Tf = fopen(path, "r"); if ( Tf ) { int f, t; while( !feof(Tf) ) { char buf[80]; buf[0] = 0; fgets(buf, sizeof(buf), Tf); if ( sscanf(buf, "%x%x", &f, &t) == 2 ) { if ( f >= 128 ) cs->Table[((unsigned)f & 0xff)-128] = (unsigned) t; } } fclose(Tf); } else verb_printf("Can't open %m: \n"); } } done: UNLOCK_LANG_CONFIG ; return(MOD_CODE_OK); }
void html_script(struct html_context *html_context, unsigned char *a, unsigned char *html, unsigned char *eof, unsigned char **end) { #ifdef CONFIG_ECMASCRIPT /* TODO: <noscript> processing. Well, same considerations apply as to * CSS property display: none processing. */ /* TODO: Charsets for external scripts. */ unsigned char *type, *language, *src; int in_comment = 0; #endif html_skip(html_context, a); #ifdef CONFIG_ECMASCRIPT /* We try to process nested <script> if we didn't process the parent * one. That's why's all the fuzz. */ /* Ref: * http://www.ietf.org/internet-drafts/draft-hoehrmann-script-types-03.txt */ type = get_attr_val(a, (unsigned char *)"type", html_context->doc_cp); if (type) { unsigned char *pos = type; if (!c_strncasecmp((const char *)type, "text/", 5)) { pos += 5; } else if (!c_strncasecmp((const char *)type, "application/", 12)) { pos += 12; } else { mem_free(type); not_processed: /* Permit nested scripts and retreat. */ html_top->invisible++; return; } if (!c_strncasecmp((const char *)pos, "javascript", 10)) { int len = strlen((const char *)pos); if (len > 10 && !isdigit(pos[10])) { mem_free(type); goto not_processed; } } else if (c_strcasecmp((const char *)pos, "ecmascript") && c_strcasecmp((const char *)pos, "jscript") && c_strcasecmp((const char *)pos, "livescript") && c_strcasecmp((const char *)pos, "x-javascript") && c_strcasecmp((const char *)pos, "x-ecmascript")) { mem_free(type); goto not_processed; } mem_free(type); } /* Check that the script content is ecmascript. The value of the * language attribute can be JavaScript with optional version digits * postfixed (like: ``JavaScript1.1''). * That attribute is deprecated in favor of type by HTML 4.01 */ language = get_attr_val(a, (unsigned char *)"language", html_context->doc_cp); if (language) { int languagelen = strlen((const char *)language); if (languagelen < 10 || (languagelen > 10 && !isdigit(language[10])) || c_strncasecmp((const char *)language, "javascript", 10)) { mem_free(language); html_top->invisible++; return; //goto not_processed; } mem_free(language); } if (html_context->part->document && (src = get_attr_val(a, (unsigned char *)"src", html_context->doc_cp))) { /* External reference. */ unsigned char *import_url; struct uri *uri; if (!get_opt_bool((const unsigned char *)"ecmascript.enable", NULL)) { mem_free(src); html_top->invisible++; return; //goto not_processed; } /* HTML <head> urls should already be fine but we can.t detect them. */ import_url = join_urls(html_context->base_href, src); mem_free(src); if (!import_url) goto imported; uri = get_uri(import_url, URI_BASE); if (!uri) goto imported; /* Request the imported script as part of the document ... */ html_context->special_f(html_context, SP_SCRIPT, uri); done_uri(uri); /* Create URL reference onload snippet. */ insert_in_string(&import_url, 0, (const unsigned char *)"^", 1); add_to_string_list(&html_context->part->document->onload_snippets, import_url, -1); imported: /* Retreat. Do not permit nested scripts, tho'. */ if (import_url) mem_free(import_url); return; } /* Positive, grab the rest and interpret it. */ /* First position to the real script start. */ while (html < eof && *html <= ' ') html++; if (eof - html > 4 && !strncmp((const char *)html, "<!--", 4)) { in_comment = 1; /* We either skip to the end of line or to -->. */ for (; *html != '\n' && *html != '\r' && eof - html >= 3; html++) { if (!strncmp((const char *)html, "-->", 3)) { /* This means the document is probably broken. * We will now try to process the rest of * <script> contents, which is however likely * to be empty. Should we try to process the * comment too? Currently it seems safer but * less tolerant to broken pages, if there are * any like this. */ html += 3; in_comment = 0; break; } } } *end = html; /* Now look ahead for the script end. The <script> contents is raw * CDATA, so we just look for the ending tag and need not care for * any quote marks counting etc - YET, we are more tolerant and permit * </script> stuff inside of the script if the whole <script> element * contents is wrapped in a comment. See i.e. Mozilla bug 26857 for fun * reading regarding this. */ for (; *end < eof; (*end)++) { unsigned char *name; int namelen; if (in_comment) { /* TODO: If we ever get some standards-quirk mode * distinction, this should be disabled in the * standards mode (and we should just look for CDATA * end, which is "</"). --pasky */ if (eof - *end >= 3 && !strncmp((const char *)*end, "-->", 3)) { /* Next iteration will jump passed the ending '>' */ (*end) += 2; in_comment = 0; } continue; /* XXX: Scan for another comment? That's admittelly * already stretching things a little bit to an * extreme ;-). */ } if (**end != '<') continue; /* We want to land before the closing element, that's why we * don't pass @end also as the appropriate parse_element() * argument. */ if (parse_element(*end, eof, &name, &namelen, NULL, NULL)) continue; if (c_strlcasecmp(name, namelen, (const unsigned char *)"/script", 7)) continue; /* We have won! */ break; } if (*end >= eof) { /* Either the document is not completely loaded yet or it's * broken. At any rate, run away screaming. */ *end = eof; /* Just for sanity. */ return; } if (html_context->part->document && *html != '^') { add_to_string_list(&html_context->part->document->onload_snippets, html, *end - html); } #endif }