const char *utf_validateString(unsigned char *s, size_t len) { size_t idx; const char *err = NULL; dchar_t dc; for (idx = 0; idx < len; ) { err = utf_decodeChar(s, len, &idx, &dc); if (err) break; } return err; }
void PragmaDeclaration::semantic(Scope *sc) { // Should be merged with PragmaStatement #if IN_LLVM Pragma llvm_internal = LLVMnone; std::string arg1str; #endif //printf("\tPragmaDeclaration::semantic '%s'\n",toChars()); if (ident == Id::msg) { if (args) { for (size_t i = 0; i < args->dim; i++) { Expression *e = (*args)[i]; sc = sc->startCTFE(); e = e->semantic(sc); e = resolveProperties(sc, e); sc = sc->endCTFE(); // pragma(msg) is allowed to contain types as well as expressions e = ctfeInterpretForPragmaMsg(e); if (e->op == TOKerror) { errorSupplemental(loc, "while evaluating pragma(msg, %s)", (*args)[i]->toChars()); return; } StringExp *se = e->toString(); if (se) { se = se->toUTF8(sc); fprintf(stderr, "%.*s", (int)se->len, (char *)se->string); } else fprintf(stderr, "%s", e->toChars()); } fprintf(stderr, "\n"); } goto Lnodecl; } else if (ident == Id::lib) { if (!args || args->dim != 1) error("string expected for library name"); else { Expression *e = (*args)[0]; sc = sc->startCTFE(); e = e->semantic(sc); e = resolveProperties(sc, e); sc = sc->endCTFE(); e = e->ctfeInterpret(); (*args)[0] = e; if (e->op == TOKerror) goto Lnodecl; StringExp *se = e->toString(); if (!se) error("string expected for library name, not '%s'", e->toChars()); else { char *name = (char *)mem.malloc(se->len + 1); memcpy(name, se->string, se->len); name[se->len] = 0; if (global.params.verbose) fprintf(global.stdmsg, "library %s\n", name); if (global.params.moduleDeps && !global.params.moduleDepsFile) { OutBuffer *ob = global.params.moduleDeps; Module *imod = sc->instantiatingModule(); ob->writestring("depsLib "); ob->writestring(imod->toPrettyChars()); ob->writestring(" ("); escapePath(ob, imod->srcfile->toChars()); ob->writestring(") : "); ob->writestring((char *) name); ob->writenl(); } mem.free(name); } } goto Lnodecl; } else if (ident == Id::startaddress) { if (!args || args->dim != 1) error("function name expected for start address"); else { /* Bugzilla 11980: * resolveProperties and ctfeInterpret call are not necessary. */ Expression *e = (*args)[0]; sc = sc->startCTFE(); e = e->semantic(sc); sc = sc->endCTFE(); (*args)[0] = e; Dsymbol *sa = getDsymbol(e); if (!sa || !sa->isFuncDeclaration()) error("function name expected for start address, not '%s'", e->toChars()); } goto Lnodecl; } else if (ident == Id::mangle) { if (!args || args->dim != 1) error("string expected for mangled name"); else { Expression *e = (*args)[0]; e = e->semantic(sc); e = e->ctfeInterpret(); (*args)[0] = e; if (e->op == TOKerror) goto Lnodecl; StringExp *se = e->toString(); if (!se) { error("string expected for mangled name, not '%s'", e->toChars()); return; } if (!se->len) error("zero-length string not allowed for mangled name"); if (se->sz != 1) error("mangled name characters can only be of type char"); #if 1 /* Note: D language specification should not have any assumption about backend * implementation. Ideally pragma(mangle) can accept a string of any content. * * Therefore, this validation is compiler implementation specific. */ for (size_t i = 0; i < se->len; ) { utf8_t *p = (utf8_t *)se->string; dchar_t c = p[i]; if (c < 0x80) { if (c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c >= '0' && c <= '9' || c != 0 && strchr("$%().:?@[]_", c)) { ++i; continue; } else { error("char 0x%02x not allowed in mangled name", c); break; } } if (const char* msg = utf_decodeChar((utf8_t *)se->string, se->len, &i, &c)) { error("%s", msg); break; } if (!isUniAlpha(c)) { error("char 0x%04x not allowed in mangled name", c); break; } } #endif } } #if IN_LLVM else if ((llvm_internal = DtoGetPragma(sc, this, arg1str)) != LLVMnone) { // nothing to do anymore } #endif else if (global.params.ignoreUnsupportedPragmas) { if (global.params.verbose) { /* Print unrecognized pragmas */ fprintf(global.stdmsg, "pragma %s", ident->toChars()); if (args) { for (size_t i = 0; i < args->dim; i++) { Expression *e = (*args)[i]; #if IN_LLVM // ignore errors in ignored pragmas. global.gag++; unsigned errors_save = global.errors; #endif sc = sc->startCTFE(); e = e->semantic(sc); e = resolveProperties(sc, e); sc = sc->endCTFE(); e = e->ctfeInterpret(); if (i == 0) fprintf(global.stdmsg, " ("); else fprintf(global.stdmsg, ","); fprintf(global.stdmsg, "%s", e->toChars()); #if IN_LLVM // restore error state. global.gag--; global.errors = errors_save; #endif } if (args->dim) fprintf(global.stdmsg, ")"); } fprintf(global.stdmsg, "\n"); } } else error("unrecognized pragma(%s)", ident->toChars()); Ldecl: if (decl) { for (size_t i = 0; i < decl->dim; i++) { Dsymbol *s = (*decl)[i]; s->semantic(sc); if (ident == Id::mangle) { StringExp *e = (*args)[0]->toString(); char *name = (char *)mem.malloc(e->len + 1); memcpy(name, e->string, e->len); name[e->len] = 0; unsigned cnt = setMangleOverride(s, name); if (cnt > 1) error("can only apply to a single declaration"); } #if IN_LLVM else { DtoCheckPragma(this, s, llvm_internal, arg1str); } #endif } } return; Lnodecl: if (decl) { error("pragma is missing closing ';'"); goto Ldecl; // do them anyway, to avoid segfaults. } }
Expression *StringExp::castTo(Scope *sc, Type *t) { /* This follows copy-on-write; any changes to 'this' * will result in a copy. * The this->string member is considered immutable. */ StringExp *se; Type *tb; int copied = 0; //printf("StringExp::castTo(t = %s), '%s' committed = %d\n", t->toChars(), toChars(), committed); if (!committed && t->ty == Tpointer && t->nextOf()->ty == Tvoid) { error("cannot convert string literal to void*"); return new ErrorExp(); } se = this; if (!committed) { se = (StringExp *)copy(); se->committed = 1; copied = 1; } if (type == t) { return se; } tb = t->toBasetype(); //printf("\ttype = %s\n", type->toChars()); if (tb->ty == Tdelegate && type->toBasetype()->ty != Tdelegate) return Expression::castTo(sc, t); Type *typeb = type->toBasetype(); if (typeb == tb) { if (!copied) { se = (StringExp *)copy(); copied = 1; } se->type = t; return se; } if (tb->ty != Tsarray && tb->ty != Tarray && tb->ty != Tpointer) { if (!copied) { se = (StringExp *)copy(); copied = 1; } goto Lcast; } if (typeb->ty != Tsarray && typeb->ty != Tarray && typeb->ty != Tpointer) { if (!copied) { se = (StringExp *)copy(); copied = 1; } goto Lcast; } if (typeb->nextOf()->size() == tb->nextOf()->size()) { if (!copied) { se = (StringExp *)copy(); copied = 1; } if (tb->ty == Tsarray) goto L2; // handle possible change in static array dimension se->type = t; return se; } if (committed) goto Lcast; #define X(tf,tt) ((tf) * 256 + (tt)) { OutBuffer buffer; size_t newlen = 0; int tfty = typeb->nextOf()->toBasetype()->ty; int ttty = tb->nextOf()->toBasetype()->ty; switch (X(tfty, ttty)) { case X(Tchar, Tchar): case X(Twchar,Twchar): case X(Tdchar,Tdchar): break; case X(Tchar, Twchar): for (size_t u = 0; u < len;) { unsigned c; const char *p = utf_decodeChar((unsigned char *)se->string, len, &u, &c); if (p) error("%s", p); else buffer.writeUTF16(c); } newlen = buffer.offset / 2; buffer.writeUTF16(0); goto L1; case X(Tchar, Tdchar): for (size_t u = 0; u < len;) { unsigned c; const char *p = utf_decodeChar((unsigned char *)se->string, len, &u, &c); if (p) error("%s", p); buffer.write4(c); newlen++; } buffer.write4(0); goto L1; case X(Twchar,Tchar): for (size_t u = 0; u < len;) { unsigned c; const char *p = utf_decodeWchar((unsigned short *)se->string, len, &u, &c); if (p) error("%s", p); else buffer.writeUTF8(c); } newlen = buffer.offset; buffer.writeUTF8(0); goto L1; case X(Twchar,Tdchar): for (size_t u = 0; u < len;) { unsigned c; const char *p = utf_decodeWchar((unsigned short *)se->string, len, &u, &c); if (p) error("%s", p); buffer.write4(c); newlen++; } buffer.write4(0); goto L1; case X(Tdchar,Tchar): for (size_t u = 0; u < len; u++) { unsigned c = ((unsigned *)se->string)[u]; if (!utf_isValidDchar(c)) error("invalid UCS-32 char \\U%08x", c); else buffer.writeUTF8(c); newlen++; } newlen = buffer.offset; buffer.writeUTF8(0); goto L1; case X(Tdchar,Twchar): for (size_t u = 0; u < len; u++) { unsigned c = ((unsigned *)se->string)[u]; if (!utf_isValidDchar(c)) error("invalid UCS-32 char \\U%08x", c); else buffer.writeUTF16(c); newlen++; } newlen = buffer.offset / 2; buffer.writeUTF16(0); goto L1; L1: if (!copied) { se = (StringExp *)copy(); copied = 1; } se->string = buffer.extractData(); se->len = newlen; se->sz = tb->nextOf()->size(); break; default: assert(typeb->nextOf()->size() != tb->nextOf()->size()); goto Lcast; } } #undef X L2: assert(copied); // See if need to truncate or extend the literal if (tb->ty == Tsarray) { int dim2 = ((TypeSArray *)tb)->dim->toInteger(); //printf("dim from = %d, to = %d\n", se->len, dim2); // Changing dimensions if (dim2 != se->len) { // Copy when changing the string literal unsigned newsz = se->sz; void *s; int d; d = (dim2 < se->len) ? dim2 : se->len; s = (unsigned char *)mem.malloc((dim2 + 1) * newsz); memcpy(s, se->string, d * newsz); // Extend with 0, add terminating 0 memset((char *)s + d * newsz, 0, (dim2 + 1 - d) * newsz); se->string = s; se->len = dim2; } } se->type = t; return se; Lcast: Expression *e = new CastExp(loc, se, t); e->type = t; // so semantic() won't be run on e return e; }