void Module::parse() { //printf("Module::parse()\n"); char *srcname = srcfile->name->toChars(); //printf("Module::parse(srcname = '%s')\n", srcname); unsigned char *buf = srcfile->buffer; size_t buflen = srcfile->len; if (buflen >= 2) { /* Convert all non-UTF-8 formats to UTF-8. * BOM : http://www.unicode.org/faq/utf_bom.html * 00 00 FE FF UTF-32BE, big-endian * FF FE 00 00 UTF-32LE, little-endian * FE FF UTF-16BE, big-endian * FF FE UTF-16LE, little-endian * EF BB BF UTF-8 */ unsigned le; unsigned bom = 1; // assume there's a BOM if (buf[0] == 0xFF && buf[1] == 0xFE) { if (buflen >= 4 && buf[2] == 0 && buf[3] == 0) { // UTF-32LE le = 1; Lutf32: OutBuffer dbuf; unsigned *pu = (unsigned *)(buf); unsigned *pumax = &pu[buflen / 4]; if (buflen & 3) { error("odd length of UTF-32 char source %u", buflen); fatal(); } dbuf.reserve(buflen / 4); for (pu += bom; pu < pumax; pu++) { unsigned u; u = le ? readlongLE(pu) : readlongBE(pu); if (u & ~0x7F) { if (u > 0x10FFFF) { error("UTF-32 value %08x greater than 0x10FFFF", u); fatal(); } dbuf.writeUTF8(u); } else dbuf.writeByte(u); } dbuf.writeByte(0); // add 0 as sentinel for scanner buflen = dbuf.offset - 1; // don't include sentinel in count buf = (unsigned char *) dbuf.extractData(); } else { // UTF-16LE (X86) // Convert it to UTF-8 le = 1; Lutf16: OutBuffer dbuf; unsigned short *pu = (unsigned short *)(buf); unsigned short *pumax = &pu[buflen / 2]; if (buflen & 1) { error("odd length of UTF-16 char source %u", buflen); fatal(); } dbuf.reserve(buflen / 2); for (pu += bom; pu < pumax; pu++) { unsigned u; u = le ? readwordLE(pu) : readwordBE(pu); if (u & ~0x7F) { if (u >= 0xD800 && u <= 0xDBFF) { unsigned u2; if (++pu > pumax) { error("surrogate UTF-16 high value %04x at EOF", u); fatal(); } u2 = le ? readwordLE(pu) : readwordBE(pu); if (u2 < 0xDC00 || u2 > 0xDFFF) { error("surrogate UTF-16 low value %04x out of range", u2); fatal(); } u = (u - 0xD7C0) << 10; u |= (u2 - 0xDC00); } else if (u >= 0xDC00 && u <= 0xDFFF) { error("unpaired surrogate UTF-16 value %04x", u); fatal(); } else if (u == 0xFFFE || u == 0xFFFF) { error("illegal UTF-16 value %04x", u); fatal(); } dbuf.writeUTF8(u); } else dbuf.writeByte(u); } dbuf.writeByte(0); // add 0 as sentinel for scanner buflen = dbuf.offset - 1; // don't include sentinel in count buf = (unsigned char *) dbuf.extractData(); } } else if (buf[0] == 0xFE && buf[1] == 0xFF) { // UTF-16BE le = 0; goto Lutf16; } else if (buflen >= 4 && buf[0] == 0 && buf[1] == 0 && buf[2] == 0xFE && buf[3] == 0xFF) { // UTF-32BE le = 0; goto Lutf32; } else if (buflen >= 3 && buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF) { // UTF-8 buf += 3; buflen -= 3; } else { /* There is no BOM. Make use of Arcane Jill's insight that * the first char of D source must be ASCII to * figure out the encoding. */ bom = 0; if (buflen >= 4) { if (buf[1] == 0 && buf[2] == 0 && buf[3] == 0) { // UTF-32LE le = 1; goto Lutf32; } else if (buf[0] == 0 && buf[1] == 0 && buf[2] == 0) { // UTF-32BE le = 0; goto Lutf32; } } if (buflen >= 2) { if (buf[1] == 0) { // UTF-16LE le = 1; goto Lutf16; } else if (buf[0] == 0) { // UTF-16BE le = 0; goto Lutf16; } } // It's UTF-8 if (buf[0] >= 0x80) { error("source file must start with BOM or ASCII character, not \\x%02X", buf[0]); fatal(); } } } /* If it starts with the string "Ddoc", then it's a documentation * source file. */ if (buflen >= 4 && memcmp(buf, "Ddoc", 4) == 0) { comment = buf + 4; isDocFile = 1; if (!docfile) setDocfile(); return; } Parser p(this, buf, buflen, docfile != NULL); p.nextToken(); members = p.parseModule(); if (srcfile->ref == 0) ::free(srcfile->buffer); srcfile->buffer = NULL; srcfile->len = 0; md = p.md; numlines = p.loc.linnum; /* The symbol table into which the module is to be inserted. */ DsymbolTable *dst; if (md) { /* A ModuleDeclaration, md, was provided. * The ModuleDeclaration sets the packages this module appears in, and * the name of this module. */ this->ident = md->id; this->safe = md->safe; Package *ppack = NULL; dst = Package::resolve(md->packages, &this->parent, &ppack); #if 0 if (ppack && ppack->isModule()) { error(loc, "package name '%s' in file %s conflicts with usage as a module name in file %s", ppack->toChars(), srcname, ppack->isModule()->srcfile->toChars()); dst = modules; } #endif } else { /* The name of the module is set to the source file name. * There are no packages. */ dst = modules; // and so this module goes into global module symbol table /* Check to see if module name is a valid identifier */ if (!Lexer::isValidIdentifier(this->ident->toChars())) error("has non-identifier characters in filename, use module declaration instead"); } // Insert module into the symbol table Dsymbol *s = this; bool isPackageMod = strcmp(srcfile->name->name(), "package.d") == 0; if (isPackageMod) { /* If the source tree is as follows: * pkg/ * +- package.d * +- common.d * the 'pkg' will be incorporated to the internal package tree in two ways: * import pkg; * and: * import pkg.common; * * If both are used in one compilation, 'pkg' as a module (== pkg/package.d) * and a package name 'pkg' will conflict each other. * * To avoid the confliction, * 1. If preceding package name insertion had occurred by Package::resolve, * later package.d loading will change Package::isPkgMod to PKGmodule and set Package::mod. * 2. Otherwise, 'package.d' wrapped by 'Package' is inserted to the internal tree in here. */ Package *p = new Package(ident); p->isPkgMod = PKGmodule; p->mod = this; p->symtab = new DsymbolTable(); s = p; } if (!dst->insert(s)) { /* It conflicts with a name that is already in the symbol table. * Figure out what went wrong, and issue error message. */ Dsymbol *prev = dst->lookup(ident); assert(prev); Module *mprev = prev->isModule(); if (mprev) { if (strcmp(srcname, mprev->srcfile->toChars()) == 0) error(loc, "from file %s must be imported as module '%s'", srcname, toPrettyChars()); else error(loc, "from file %s conflicts with another module %s from file %s", srcname, mprev->toChars(), mprev->srcfile->toChars()); } else { Package *pkg = prev->isPackage(); assert(pkg); if (pkg->isPkgMod == PKGunknown && isPackageMod) { /* If the previous inserted Package is not yet determined as package.d, * link it to the actual module. */ pkg->isPkgMod = PKGmodule; pkg->mod = this; } else error(pkg->loc, "from file %s conflicts with package name %s", srcname, pkg->toChars()); } } else { // Add to global array of all modules amodules.push(this); } }
Module::Module(char *filename, Identifier *ident, int doDocComment, int doHdrGen) : Package(ident) { FileName *srcfilename; FileName *objfilename; FileName *symfilename; // printf("Module::Module(filename = '%s', ident = '%s')\n", filename, ident->toChars()); this->arg = filename; md = NULL; errors = 0; numlines = 0; members = NULL; isDocFile = 0; needmoduleinfo = 0; #ifdef IN_GCC strictlyneedmoduleinfo = 0; #endif selfimports = 0; insearch = 0; searchCacheIdent = NULL; searchCacheSymbol = NULL; searchCacheFlags = 0; semanticstarted = 0; semanticRun = 0; decldefs = NULL; vmoduleinfo = NULL; massert = NULL; munittest = NULL; marray = NULL; sictor = NULL; sctor = NULL; sdtor = NULL; ssharedctor = NULL; sshareddtor = NULL; stest = NULL; sfilename = NULL; root = 0; importedFrom = NULL; srcfile = NULL; docfile = NULL; debuglevel = 0; debugids = NULL; debugidsNot = NULL; versionlevel = 0; versionids = NULL; versionidsNot = NULL; macrotable = NULL; escapetable = NULL; safe = FALSE; doppelganger = 0; cov = NULL; covb = NULL; nameoffset = 0; namelen = 0; srcfilename = FileName::defaultExt(filename, global.mars_ext); if (!srcfilename->equalsExt(global.mars_ext) && !srcfilename->equalsExt(global.hdr_ext) && !srcfilename->equalsExt("dd")) { error("source file name '%s' must have .%s extension", srcfilename->toChars(), global.mars_ext); fatal(); } char *argobj; if (global.params.objname) argobj = global.params.objname; #if 0 else if (global.params.preservePaths) argobj = filename; else argobj = FileName::name(filename); if (!FileName::absolute(argobj)) { argobj = FileName::combine(global.params.objdir, argobj); } #else // Bugzilla 3547 else { if (global.params.preservePaths) argobj = filename; else argobj = FileName::name(filename); if (!FileName::absolute(argobj)) { argobj = FileName::combine(global.params.objdir, argobj); } } #endif if (global.params.objname) objfilename = new FileName(argobj); else objfilename = FileName::forceExt(argobj, global.obj_ext); symfilename = FileName::forceExt(filename, global.sym_ext); srcfile = new File(srcfilename); if (doDocComment) { setDocfile(); } if (doHdrGen) { setHdrfile(); } objfile = new File(objfilename); symfile = new File(symfilename); }
Module::Module(const char *filename, Identifier *ident, int doDocComment, int doHdrGen) : Package(ident) { const char *srcfilename; // printf("Module::Module(filename = '%s', ident = '%s')\n", filename, ident->toChars()); this->arg = filename; md = NULL; errors = 0; numlines = 0; members = NULL; isDocFile = 0; isPackageFile = false; needmoduleinfo = 0; selfimports = 0; rootimports = 0; insearch = 0; searchCacheIdent = NULL; searchCacheSymbol = NULL; searchCacheFlags = 0; decldefs = NULL; massert = NULL; munittest = NULL; marray = NULL; sictor = NULL; sctor = NULL; sdtor = NULL; ssharedctor = NULL; sshareddtor = NULL; stest = NULL; sfilename = NULL; importedFrom = NULL; srcfile = NULL; docfile = NULL; debuglevel = 0; debugids = NULL; debugidsNot = NULL; versionlevel = 0; versionids = NULL; versionidsNot = NULL; macrotable = NULL; escapetable = NULL; doppelganger = 0; cov = NULL; covb = NULL; nameoffset = 0; namelen = 0; srcfilename = FileName::defaultExt(filename, global.mars_ext); if (global.run_noext && global.params.run && !FileName::ext(filename) && FileName::exists(srcfilename) == 0 && FileName::exists(filename) == 1) { FileName::free(srcfilename); srcfilename = FileName::removeExt(filename); // just does a mem.strdup(filename) } else if (!FileName::equalsExt(srcfilename, global.mars_ext) && !FileName::equalsExt(srcfilename, global.hdr_ext) && !FileName::equalsExt(srcfilename, "dd")) { error("source file name '%s' must have .%s extension", srcfilename, global.mars_ext); fatal(); } srcfile = new File(srcfilename); objfile = setOutfile(global.params.objname, global.params.objdir, filename, global.obj_ext); if (doDocComment) setDocfile(); if (doHdrGen) hdrfile = setOutfile(global.params.hdrname, global.params.hdrdir, arg, global.hdr_ext); //objfile = new File(objfilename); }
void Module::parse() { //printf("Module::parse()\n"); char *srcname = srcfile->name->toChars(); //printf("Module::parse(srcname = '%s')\n", srcname); unsigned char *buf = srcfile->buffer; unsigned buflen = srcfile->len; if (buflen >= 2) { /* Convert all non-UTF-8 formats to UTF-8. * BOM : http://www.unicode.org/faq/utf_bom.html * 00 00 FE FF UTF-32BE, big-endian * FF FE 00 00 UTF-32LE, little-endian * FE FF UTF-16BE, big-endian * FF FE UTF-16LE, little-endian * EF BB BF UTF-8 */ unsigned le; unsigned bom = 1; // assume there's a BOM if (buf[0] == 0xFF && buf[1] == 0xFE) { if (buflen >= 4 && buf[2] == 0 && buf[3] == 0) { // UTF-32LE le = 1; Lutf32: OutBuffer dbuf; unsigned *pu = (unsigned *)(buf); unsigned *pumax = &pu[buflen / 4]; if (buflen & 3) { error("odd length of UTF-32 char source %u", buflen); fatal(); } dbuf.reserve(buflen / 4); for (pu += bom; pu < pumax; pu++) { unsigned u; u = le ? readlongLE(pu) : readlongBE(pu); if (u & ~0x7F) { if (u > 0x10FFFF) { error("UTF-32 value %08x greater than 0x10FFFF", u); fatal(); } dbuf.writeUTF8(u); } else dbuf.writeByte(u); } dbuf.writeByte(0); // add 0 as sentinel for scanner buflen = dbuf.offset - 1; // don't include sentinel in count buf = (unsigned char *) dbuf.extractData(); } else { // UTF-16LE (X86) // Convert it to UTF-8 le = 1; Lutf16: OutBuffer dbuf; unsigned short *pu = (unsigned short *)(buf); unsigned short *pumax = &pu[buflen / 2]; if (buflen & 1) { error("odd length of UTF-16 char source %u", buflen); fatal(); } dbuf.reserve(buflen / 2); for (pu += bom; pu < pumax; pu++) { unsigned u; u = le ? readwordLE(pu) : readwordBE(pu); if (u & ~0x7F) { if (u >= 0xD800 && u <= 0xDBFF) { unsigned u2; if (++pu > pumax) { error("surrogate UTF-16 high value %04x at EOF", u); fatal(); } u2 = le ? readwordLE(pu) : readwordBE(pu); if (u2 < 0xDC00 || u2 > 0xDFFF) { error("surrogate UTF-16 low value %04x out of range", u2); fatal(); } u = (u - 0xD7C0) << 10; u |= (u2 - 0xDC00); } else if (u >= 0xDC00 && u <= 0xDFFF) { error("unpaired surrogate UTF-16 value %04x", u); fatal(); } else if (u == 0xFFFE || u == 0xFFFF) { error("illegal UTF-16 value %04x", u); fatal(); } dbuf.writeUTF8(u); } else dbuf.writeByte(u); } dbuf.writeByte(0); // add 0 as sentinel for scanner buflen = dbuf.offset - 1; // don't include sentinel in count buf = (unsigned char *) dbuf.extractData(); } } else if (buf[0] == 0xFE && buf[1] == 0xFF) { // UTF-16BE le = 0; goto Lutf16; } else if (buflen >= 4 && buf[0] == 0 && buf[1] == 0 && buf[2] == 0xFE && buf[3] == 0xFF) { // UTF-32BE le = 0; goto Lutf32; } else if (buflen >= 3 && buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF) { // UTF-8 buf += 3; buflen -= 3; } else { /* There is no BOM. Make use of Arcane Jill's insight that * the first char of D source must be ASCII to * figure out the encoding. */ bom = 0; if (buflen >= 4) { if (buf[1] == 0 && buf[2] == 0 && buf[3] == 0) { // UTF-32LE le = 1; goto Lutf32; } else if (buf[0] == 0 && buf[1] == 0 && buf[2] == 0) { // UTF-32BE le = 0; goto Lutf32; } } if (buflen >= 2) { if (buf[1] == 0) { // UTF-16LE le = 1; goto Lutf16; } else if (buf[0] == 0) { // UTF-16BE le = 0; goto Lutf16; } } // It's UTF-8 if (buf[0] >= 0x80) { error("source file must start with BOM or ASCII character, not \\x%02X", buf[0]); fatal(); } } } #ifdef IN_GCC // dump utf-8 encoded source if (global.params.dump_source) { // %% srcname could contain a path ... d_gcc_dump_source(srcname, "utf-8", buf, buflen); } #endif /* If it starts with the string "Ddoc", then it's a documentation * source file. */ if (buflen >= 4 && memcmp(buf, "Ddoc", 4) == 0) { comment = buf + 4; isDocFile = 1; if (!docfile) setDocfile(); return; } Parser p(this, buf, buflen, docfile != NULL); p.nextToken(); members = p.parseModule(); ::free(srcfile->buffer); srcfile->buffer = NULL; srcfile->len = 0; md = p.md; numlines = p.loc.linnum; DsymbolTable *dst; if (md) { this->ident = md->id; this->safe = md->safe; Package *ppack = NULL; dst = Package::resolve(md->packages, &this->parent, &ppack); if (ppack && ppack->isModule()) { error(loc, "package name '%s' in file %s conflicts with usage as a module name in file %s", ppack->toChars(), srcname, ppack->isModule()->srcfile->toChars()); dst = modules; } } else { dst = modules; /* Check to see if module name is a valid identifier */ if (!Lexer::isValidIdentifier(this->ident->toChars())) error("has non-identifier characters in filename, use module declaration instead"); } // Update global list of modules if (!dst->insert(this)) { Dsymbol *prev = dst->lookup(ident); assert(prev); Module *mprev = prev->isModule(); if (mprev) error(loc, "from file %s conflicts with another module %s from file %s", srcname, mprev->toChars(), mprev->srcfile->toChars()); else { Package *pkg = prev->isPackage(); assert(pkg); error(pkg->loc, "from file %s conflicts with package name %s", srcname, pkg->toChars()); } } else { amodules.push(this); } }