bool String::isIdentifier (String str) { if (_len (str.buf) == 0) return false; if (!isIdStart (str.buf[0])) return false; for (int i = 1; i < _len (str.buf); i++) if (!isIdChar (str.buf[i])) return false; return true; }
void Macro::expand(OutBuffer *buf, size_t start, size_t *pend, utf8_t *arg, size_t arglen) { #if 0 printf("Macro::expand(buf[%d..%d], arg = '%.*s')\n", start, *pend, arglen, arg); printf("Buf is: '%.*s'\n", *pend - start, buf->data + start); #endif static int nest; if (nest > 100) // limit recursive expansion return; nest++; size_t end = *pend; assert(start <= end); assert(end <= buf->offset); /* First pass - replace $0 */ arg = memdup(arg, arglen); for (size_t u = start; u + 1 < end; ) { utf8_t *p = buf->data; // buf->data is not loop invariant /* Look for $0, but not $$0, and replace it with arg. */ if (p[u] == '$' && (isdigit(p[u + 1]) || p[u + 1] == '+')) { if (u > start && p[u - 1] == '$') { // Don't expand $$0, but replace it with $0 buf->remove(u - 1, 1); end--; u += 1; // now u is one past the closing '1' continue; } utf8_t c = p[u + 1]; int n = (c == '+') ? -1 : c - '0'; utf8_t *marg; size_t marglen; extractArgN(arg, arglen, &marg, &marglen, n); if (marglen == 0) { // Just remove macro invocation //printf("Replacing '$%c' with '%.*s'\n", p[u + 1], marglen, marg); buf->remove(u, 2); end -= 2; } else if (c == '+') { // Replace '$+' with 'arg' //printf("Replacing '$%c' with '%.*s'\n", p[u + 1], marglen, marg); buf->remove(u, 2); buf->insert(u, marg, marglen); end += marglen - 2; // Scan replaced text for further expansion size_t mend = u + marglen; expand(buf, u, &mend, NULL, 0); end += mend - (u + marglen); u = mend; } else { // Replace '$1' with '\xFF{arg\xFF}' //printf("Replacing '$%c' with '\xFF{%.*s\xFF}'\n", p[u + 1], marglen, marg); buf->data[u] = 0xFF; buf->data[u + 1] = '{'; buf->insert(u + 2, marg, marglen); buf->insert(u + 2 + marglen, "\xFF}", 2); end += -2 + 2 + marglen + 2; // Scan replaced text for further expansion size_t mend = u + 2 + marglen; expand(buf, u + 2, &mend, NULL, 0); end += mend - (u + 2 + marglen); u = mend; } //printf("u = %d, end = %d\n", u, end); //printf("#%.*s#\n", end, &buf->data[0]); continue; } u++; } /* Second pass - replace other macros */ for (size_t u = start; u + 4 < end; ) { utf8_t *p = buf->data; // buf->data is not loop invariant /* A valid start of macro expansion is $(c, where c is * an id start character, and not $$(c. */ if (p[u] == '$' && p[u + 1] == '(' && isIdStart(p+u+2)) { //printf("\tfound macro start '%c'\n", p[u + 2]); utf8_t *name = p + u + 2; size_t namelen = 0; utf8_t *marg; size_t marglen; size_t v; /* Scan forward to find end of macro name and * beginning of macro argument (marg). */ for (v = u + 2; v < end; v+=utfStride(p+v)) { utf8_t c = p[v]; if (!isIdTail(p+v)) { // We've gone past the end of the macro name. namelen = v - (u + 2); break; } } v += extractArgN(p + v, end - v, &marg, &marglen, 0); assert(v <= end); if (v < end) { // v is on the closing ')' if (u > start && p[u - 1] == '$') { // Don't expand $$(NAME), but replace it with $(NAME) buf->remove(u - 1, 1); end--; u = v; // now u is one past the closing ')' continue; } Macro *m = search(name, namelen); if (m) { #if 0 if (m->textlen && m->text[0] == ' ') { m->text++; m->textlen--; } #endif if (m->inuse && marglen == 0) { // Remove macro invocation buf->remove(u, v + 1 - u); end -= v + 1 - u; } else if (m->inuse && arglen == marglen && memcmp(arg, marg, arglen) == 0) { // Recursive expansion; just leave in place } else { //printf("\tmacro '%.*s'(%.*s) = '%.*s'\n", m->namelen, m->name, marglen, marg, m->textlen, m->text); #if 1 marg = memdup(marg, marglen); // Insert replacement text buf->spread(v + 1, 2 + m->textlen + 2); buf->data[v + 1] = 0xFF; buf->data[v + 2] = '{'; memcpy(buf->data + v + 3, m->text, m->textlen); buf->data[v + 3 + m->textlen] = 0xFF; buf->data[v + 3 + m->textlen + 1] = '}'; end += 2 + m->textlen + 2; // Scan replaced text for further expansion m->inuse++; size_t mend = v + 1 + 2+m->textlen+2; expand(buf, v + 1, &mend, marg, marglen); end += mend - (v + 1 + 2+m->textlen+2); m->inuse--; buf->remove(u, v + 1 - u); end -= v + 1 - u; u += mend - (v + 1); #else // Insert replacement text buf->insert(v + 1, m->text, m->textlen); end += m->textlen; // Scan replaced text for further expansion m->inuse++; size_t mend = v + 1 + m->textlen; expand(buf, v + 1, &mend, marg, marglen); end += mend - (v + 1 + m->textlen); m->inuse--; buf->remove(u, v + 1 - u); end -= v + 1 - u; u += mend - (v + 1); #endif mem.free(marg); //printf("u = %d, end = %d\n", u, end); //printf("#%.*s#\n", end - u, &buf->data[u]); continue; } } else { // Replace $(NAME) with nothing buf->remove(u, v + 1 - u); end -= (v + 1 - u); continue; } } } u++; } mem.free(arg); *pend = end; nest--; }
void Macro::expand(OutBuffer *buf, size_t start, size_t *pend, const utf8_t *arg, size_t arglen) { #if 0 printf("Macro::expand(buf[%d..%d], arg = '%.*s')\n", start, *pend, arglen, arg); printf("Buf is: '%.*s'\n", *pend - start, buf->data + start); #endif static int nest; if (nest > 100) // limit recursive expansion return; nest++; size_t end = *pend; assert(start <= end); assert(end <= buf->offset); /* First pass - replace $0 */ arg = memdup(arg, arglen); for (size_t u = start; u + 1 < end; ) { utf8_t *p = (utf8_t *)buf->data; // buf->data is not loop invariant /* Look for $0, but not $$0, and replace it with arg. */ if (p[u] == '$' && (isdigit(p[u + 1]) || p[u + 1] == '+')) { if (u > start && p[u - 1] == '$') { // Don't expand $$0, but replace it with $0 buf->remove(u - 1, 1); end--; u += 1; // now u is one past the closing '1' continue; } utf8_t c = p[u + 1]; int n = (c == '+') ? -1 : c - '0'; const utf8_t *marg; size_t marglen; if (n == 0) { marg = arg; marglen = arglen; } else extractArgN(arg, arglen, &marg, &marglen, n); if (marglen == 0) { // Just remove macro invocation //printf("Replacing '$%c' with '%.*s'\n", p[u + 1], marglen, marg); buf->remove(u, 2); end -= 2; } else if (c == '+') { // Replace '$+' with 'arg' //printf("Replacing '$%c' with '%.*s'\n", p[u + 1], marglen, marg); buf->remove(u, 2); buf->insert(u, marg, marglen); end += marglen - 2; // Scan replaced text for further expansion size_t mend = u + marglen; expand(buf, u, &mend, NULL, 0); end += mend - (u + marglen); u = mend; } else { // Replace '$1' with '\xFF{arg\xFF}' //printf("Replacing '$%c' with '\xFF{%.*s\xFF}'\n", p[u + 1], marglen, marg); buf->data[u] = 0xFF; buf->data[u + 1] = '{'; buf->insert(u + 2, marg, marglen); buf->insert(u + 2 + marglen, (const char *)"\xFF}", 2); end += -2 + 2 + marglen + 2; // Scan replaced text for further expansion size_t mend = u + 2 + marglen; expand(buf, u + 2, &mend, NULL, 0); end += mend - (u + 2 + marglen); u = mend; } //printf("u = %d, end = %d\n", u, end); //printf("#%.*s#\n", end, &buf->data[0]); continue; } u++; } /* Second pass - replace other macros */ for (size_t u = start; u + 4 < end; ) { utf8_t *p = (utf8_t *)buf->data; // buf->data is not loop invariant /* A valid start of macro expansion is $(c, where c is * an id start character, and not $$(c. */ if (p[u] == '$' && p[u + 1] == '(' && isIdStart(p+u+2)) { //printf("\tfound macro start '%c'\n", p[u + 2]); utf8_t *name = p + u + 2; size_t namelen = 0; const utf8_t *marg; size_t marglen; size_t v; /* Scan forward to find end of macro name and * beginning of macro argument (marg). */ for (v = u + 2; v < end; v+=utfStride(p+v)) { if (!isIdTail(p+v)) { // We've gone past the end of the macro name. namelen = v - (u + 2); break; } } v += extractArgN(p + v, end - v, &marg, &marglen, 0); assert(v <= end); if (v < end) { // v is on the closing ')' if (u > start && p[u - 1] == '$') { // Don't expand $$(NAME), but replace it with $(NAME) buf->remove(u - 1, 1); end--; u = v; // now u is one past the closing ')' continue; } Macro *m = search(name, namelen); if (!m) { static const char undef[] = "DDOC_UNDEFINED_MACRO"; m = search((const utf8_t *)undef, strlen(undef)); if (m) { // Macro was not defined, so this is an expansion of // DDOC_UNDEFINED_MACRO. Prepend macro name to args. // marg = name[ ] ~ "," ~ marg[ ]; if (marglen) { utf8_t *q = (utf8_t *)mem.xmalloc(namelen + 1 + marglen); assert(q); memcpy(q, name, namelen); q[namelen] = ','; memcpy(q + namelen + 1, marg, marglen); marg = q; marglen += namelen + 1; } else { marg = name; marglen = namelen; } } } if (m) { if (m->inuse && marglen == 0) { // Remove macro invocation buf->remove(u, v + 1 - u); end -= v + 1 - u; } else if (m->inuse && ((arglen == marglen && memcmp(arg, marg, arglen) == 0) || (arglen + 4 == marglen && marg[0] == 0xFF && marg[1] == '{' && memcmp(arg, marg + 2, arglen) == 0 && marg[marglen - 2] == 0xFF && marg[marglen - 1] == '}' ) ) ) { /* Recursive expansion: * marg is same as arg (with blue paint added) * Just leave in place. */ } else { //printf("\tmacro '%.*s'(%.*s) = '%.*s'\n", m->namelen, m->name, marglen, marg, m->textlen, m->text); marg = memdup(marg, marglen); // Insert replacement text buf->spread(v + 1, 2 + m->textlen + 2); buf->data[v + 1] = 0xFF; buf->data[v + 2] = '{'; memcpy(buf->data + v + 3, m->text, m->textlen); buf->data[v + 3 + m->textlen] = 0xFF; buf->data[v + 3 + m->textlen + 1] = '}'; end += 2 + m->textlen + 2; // Scan replaced text for further expansion m->inuse++; size_t mend = v + 1 + 2+m->textlen+2; expand(buf, v + 1, &mend, marg, marglen); end += mend - (v + 1 + 2+m->textlen+2); m->inuse--; buf->remove(u, v + 1 - u); end -= v + 1 - u; u += mend - (v + 1); mem.xfree((utf8_t *)marg); //printf("u = %d, end = %d\n", u, end); //printf("#%.*s#\n", end - u, &buf->data[u]); continue; } } else { // Replace $(NAME) with nothing buf->remove(u, v + 1 - u); end -= (v + 1 - u); continue; } } } u++; } mem.xfree((utf8_t *)arg); *pend = end; nest--; }