/* * Get next tag text, eliminating leading and trailing whitespace * and leaving only a single space for all internal whitespace. */ const char * getTag(memBuf_t *mp) { static char *buf = NULL; static size_t bufsize = 0; size_t count = 0; int inStr = 0, comment = 0, c; if (memEof(mp)) { log(("getTag(): returning NULL\n")); return NULL; } while ((c = memGetc(mp)) != EOF && c != '<') ; if (c == EOF) { log(("getTag(): returning NULL\n")); return NULL; } /* first char - check for comment */ c = memGetc(mp); if (c == '>') { log(("getTag(): returning empty tag\n")); return ""; } else if (c == EOF) { log(("getTag(): returning NULL\n")); return NULL; } addchar(buf, bufsize, count, (char)c); if (c == '!') { int c2 = memGetc(mp); if (c2 == '>' || c2 == EOF) { term(buf, bufsize, count); log(("getTag(): returning %s\n", buf)); return buf; } addchar(buf, bufsize, count, (char)c2); if (c2 == '-') { int c3 = memGetc(mp); if (c3 == '>' || c3 == EOF) { term(buf, bufsize, count); log(("getTag(): returning %s\n", buf)); return buf; } addchar(buf, bufsize, count, (char)c3); comment = 1; } } if (comment) { while ((c = memGetc(mp)) != EOF) { if (c=='>' && buf[count-1]=='-' && buf[count-2]=='-') { term(buf, bufsize, count); log(("getTag(): returning %s\n", buf)); return buf; } if (isspace(c) && buf[count-1] == ' ') continue; addchar(buf, bufsize, count, (char)c); } } else { while ((c = memGetc(mp)) != EOF) { switch (c) { case '\\': addchar(buf, bufsize, count, (char)c); c = memGetc(mp); if (c == EOF) { term(buf, bufsize, count); log(("getTag(): returning %s\n", buf)); return buf; } addchar(buf, bufsize, count, (char)c); break; case '>': if (inStr) addchar(buf, bufsize, count, (char)c); else { term(buf, bufsize, count); log(("getTag(): returning %s\n", buf)); return buf; } break; case ' ': case '\n': case '\r': case '\t': case '\v': if (inStr) addchar(buf, bufsize, count, (char)c); else if (count > 0 && buf[count-1] != ' ') addchar(buf, bufsize, count, ' '); break; case '"': inStr = !inStr; /* fall through */ default: addchar(buf, bufsize, count, (char)c); } } } term(buf, bufsize, count); log(("getTag(): returning %s\n", count ? buf : "NULL")); return count ? buf : NULL; }
/* get META refresh URL (if any) */ char * memGetMetaRefresh(memBuf_t *mp) { char *cp; static char *buf = NULL; char *bufptr; static size_t bufsize = 0; char *metaRefresh = NULL; if (!buf) { bufsize = 1024; buf = myMalloc(bufsize); } /* look for all "meta" tags until Refresh found */ while (!metaRefresh && (cp = memStr(mp, "<meta")) != NULL) { int c; bufptr = buf; /* copy whole tag to buffer for processing */ for (c = memGetc(mp); c != EOF && c != '>'; c = memGetc(mp)) { *bufptr++ = (char)c; if (bufptr > buf + (bufsize -1)) { bufsize += 1024; buf = myRealloc(buf, bufsize); } } /* terminate string */ *bufptr = '\0'; log(("found META tag: %s", buf)); cp = strstr(buf, "http-equiv="); if (!cp) { log(("no http-equiv, looking for next")); continue; } cp += 11; if (strncasecmp(cp, "\"Refresh\"", 9)) { log(("no Refresh, looking for next")); continue; } cp = strstr(buf, "content=\""); if (!cp) { log(("no content, looking for next")); continue; } cp += 9; /* skip delay value (everything until ';') */ while (*cp && *cp != ';') cp++; /* if not end of string skip ';' */ if (*cp) cp++; /* and skip whitespace */ while (*cp && isspace(*cp)) cp++; /* now there should be "url=" with optional whitespace around '=' */ if (strncasecmp(cp, "url", 3)) { log(("no url key, looking for next")); continue; } cp += 3; while (*cp && isspace(*cp)) cp++; if (*cp != '=') { log(("no = after url, looking for next")); continue; } cp++; while (*cp && isspace(*cp)) cp++; /* this is the beginning of the redirection URL */ bufptr = cp; cp = strchr(bufptr, '"'); if (!cp) { log(("no closing \", looking for next")); continue; } /* cut off terminating '"' and other trailing garbage */ *cp = '\0'; metaRefresh = bufptr; } if (metaRefresh) log(("found redirection")); else log(("no redirection found")); memReset(mp); return metaRefresh; }
/* * Get next non-tag text, eliminating leading and trailing whitespace * and leaving only a single space for all internal whitespace. */ char * getNonTag(memBuf_t *mp) { static char *buf = NULL; static size_t bufsize = 0; size_t count = 0, amp = 0; int c; if (memEof(mp)) { log(("getNonTag(): returning NULL\n")); return NULL; } while ((c = memGetc(mp)) != EOF) { switch (c) { case '<': memUngetc(mp); if (count) { if (buf[count-1] == ' ') --count; term(buf, bufsize, count); log(("getNonTag(): returning %s\n", buf)); return buf; } else (void)getTag(mp); break; case ' ': case '\n': case '\r': case '\t': case '\v': case 0x82: /* UTF-8 */ case 0xC2: /* UTF-8 */ case 0xC3: /* UTF-8 */ case 0xA0: /* iso-8859-1 nbsp */ if (count && buf[count-1] != ' ') addchar(buf, bufsize, count, ' '); break; case ';': if (amp > 0) { char *cp = &buf[amp]; term(buf, bufsize, count); if (*cp == '#') { buf[amp-1] = (char)atoi(cp+1); count = amp; } else if (!strcmp(cp, "amp")) { count = amp; } else if (!strcmp(cp, "gt")) { buf[amp-1] = '>'; count = amp; } else if (!strcmp(cp, "lt")) { buf[amp-1] = '<'; count = amp; } else if (!strcmp(cp, "nbsp")) { buf[amp-1] = ' '; count = amp; if (count && buf[count-1] == ' ') --count; } else if (!strcmp(cp, "quot")) { buf[amp-1] = '&'; count = amp; } else addchar(buf, bufsize, count, (char)c); amp = 0; } else addchar(buf, bufsize, count, (char)c); break; case '&': amp = count + 1; /* fall through */ default: addchar(buf, bufsize, count, (char)c); } } if (count && buf[count-1] == ' ') --count; term(buf, bufsize, count); log(("getNonTag(): returning %s\n", count ? buf : "NULL")); return count ? buf : NULL; } /* getNonTag() */