/* * Get next tag text, eliminating leading and trailing whitespace * and leaving only a single space for all internal whitespace. */ const char * getTag(memBuf_t *mp) { static char *buf = NULL; static size_t bufsize = 0; size_t count = 0; int inStr = 0, comment = 0, c; if (memEof(mp)) { log(("getTag(): returning NULL\n")); return NULL; } while ((c = memGetc(mp)) != EOF && c != '<') ; if (c == EOF) { log(("getTag(): returning NULL\n")); return NULL; } /* first char - check for comment */ c = memGetc(mp); if (c == '>') { log(("getTag(): returning empty tag\n")); return ""; } else if (c == EOF) { log(("getTag(): returning NULL\n")); return NULL; } addchar(buf, bufsize, count, (char)c); if (c == '!') { int c2 = memGetc(mp); if (c2 == '>' || c2 == EOF) { term(buf, bufsize, count); log(("getTag(): returning %s\n", buf)); return buf; } addchar(buf, bufsize, count, (char)c2); if (c2 == '-') { int c3 = memGetc(mp); if (c3 == '>' || c3 == EOF) { term(buf, bufsize, count); log(("getTag(): returning %s\n", buf)); return buf; } addchar(buf, bufsize, count, (char)c3); comment = 1; } } if (comment) { while ((c = memGetc(mp)) != EOF) { if (c=='>' && buf[count-1]=='-' && buf[count-2]=='-') { term(buf, bufsize, count); log(("getTag(): returning %s\n", buf)); return buf; } if (isspace(c) && buf[count-1] == ' ') continue; addchar(buf, bufsize, count, (char)c); } } else { while ((c = memGetc(mp)) != EOF) { switch (c) { case '\\': addchar(buf, bufsize, count, (char)c); c = memGetc(mp); if (c == EOF) { term(buf, bufsize, count); log(("getTag(): returning %s\n", buf)); return buf; } addchar(buf, bufsize, count, (char)c); break; case '>': if (inStr) addchar(buf, bufsize, count, (char)c); else { term(buf, bufsize, count); log(("getTag(): returning %s\n", buf)); return buf; } break; case ' ': case '\n': case '\r': case '\t': case '\v': if (inStr) addchar(buf, bufsize, count, (char)c); else if (count > 0 && buf[count-1] != ' ') addchar(buf, bufsize, count, ' '); break; case '"': inStr = !inStr; /* fall through */ default: addchar(buf, bufsize, count, (char)c); } } } term(buf, bufsize, count); log(("getTag(): returning %s\n", count ? buf : "NULL")); return count ? buf : NULL; }
int memGetc(memBuf_t *mp) { return memEof(mp) ? EOF : (int)(*(unsigned char *)(mp->readptr++)); }
/* * Get next non-tag text, eliminating leading and trailing whitespace * and leaving only a single space for all internal whitespace. */ char * getNonTag(memBuf_t *mp) { static char *buf = NULL; static size_t bufsize = 0; size_t count = 0, amp = 0; int c; if (memEof(mp)) { log(("getNonTag(): returning NULL\n")); return NULL; } while ((c = memGetc(mp)) != EOF) { switch (c) { case '<': memUngetc(mp); if (count) { if (buf[count-1] == ' ') --count; term(buf, bufsize, count); log(("getNonTag(): returning %s\n", buf)); return buf; } else (void)getTag(mp); break; case ' ': case '\n': case '\r': case '\t': case '\v': case 0x82: /* UTF-8 */ case 0xC2: /* UTF-8 */ case 0xC3: /* UTF-8 */ case 0xA0: /* iso-8859-1 nbsp */ if (count && buf[count-1] != ' ') addchar(buf, bufsize, count, ' '); break; case ';': if (amp > 0) { char *cp = &buf[amp]; term(buf, bufsize, count); if (*cp == '#') { buf[amp-1] = (char)atoi(cp+1); count = amp; } else if (!strcmp(cp, "amp")) { count = amp; } else if (!strcmp(cp, "gt")) { buf[amp-1] = '>'; count = amp; } else if (!strcmp(cp, "lt")) { buf[amp-1] = '<'; count = amp; } else if (!strcmp(cp, "nbsp")) { buf[amp-1] = ' '; count = amp; if (count && buf[count-1] == ' ') --count; } else if (!strcmp(cp, "quot")) { buf[amp-1] = '&'; count = amp; } else addchar(buf, bufsize, count, (char)c); amp = 0; } else addchar(buf, bufsize, count, (char)c); break; case '&': amp = count + 1; /* fall through */ default: addchar(buf, bufsize, count, (char)c); } } if (count && buf[count-1] == ' ') --count; term(buf, bufsize, count); log(("getNonTag(): returning %s\n", count ? buf : "NULL")); return count ? buf : NULL; } /* getNonTag() */