Пример #1
0
void Module::parse()
{
    //printf("Module::parse()\n");

    char *srcname = srcfile->name->toChars();
    //printf("Module::parse(srcname = '%s')\n", srcname);

    unsigned char *buf = srcfile->buffer;
    size_t buflen = srcfile->len;

    if (buflen >= 2)
    {
        /* Convert all non-UTF-8 formats to UTF-8.
         * BOM : http://www.unicode.org/faq/utf_bom.html
         * 00 00 FE FF  UTF-32BE, big-endian
         * FF FE 00 00  UTF-32LE, little-endian
         * FE FF        UTF-16BE, big-endian
         * FF FE        UTF-16LE, little-endian
         * EF BB BF     UTF-8
         */

        unsigned le;
        unsigned bom = 1;                // assume there's a BOM
        if (buf[0] == 0xFF && buf[1] == 0xFE)
        {
            if (buflen >= 4 && buf[2] == 0 && buf[3] == 0)
            {   // UTF-32LE
                le = 1;

            Lutf32:
                OutBuffer dbuf;
                unsigned *pu = (unsigned *)(buf);
                unsigned *pumax = &pu[buflen / 4];

                if (buflen & 3)
                {   error("odd length of UTF-32 char source %u", buflen);
                    fatal();
                }

                dbuf.reserve(buflen / 4);
                for (pu += bom; pu < pumax; pu++)
                {   unsigned u;

                    u = le ? readlongLE(pu) : readlongBE(pu);
                    if (u & ~0x7F)
                    {
                        if (u > 0x10FFFF)
                        {   error("UTF-32 value %08x greater than 0x10FFFF", u);
                            fatal();
                        }
                        dbuf.writeUTF8(u);
                    }
                    else
                        dbuf.writeByte(u);
                }
                dbuf.writeByte(0);              // add 0 as sentinel for scanner
                buflen = dbuf.offset - 1;       // don't include sentinel in count
                buf = (unsigned char *) dbuf.extractData();
            }
            else
            {   // UTF-16LE (X86)
                // Convert it to UTF-8
                le = 1;

            Lutf16:
                OutBuffer dbuf;
                unsigned short *pu = (unsigned short *)(buf);
                unsigned short *pumax = &pu[buflen / 2];

                if (buflen & 1)
                {   error("odd length of UTF-16 char source %u", buflen);
                    fatal();
                }

                dbuf.reserve(buflen / 2);
                for (pu += bom; pu < pumax; pu++)
                {   unsigned u;

                    u = le ? readwordLE(pu) : readwordBE(pu);
                    if (u & ~0x7F)
                    {   if (u >= 0xD800 && u <= 0xDBFF)
                        {   unsigned u2;

                            if (++pu > pumax)
                            {   error("surrogate UTF-16 high value %04x at EOF", u);
                                fatal();
                            }
                            u2 = le ? readwordLE(pu) : readwordBE(pu);
                            if (u2 < 0xDC00 || u2 > 0xDFFF)
                            {   error("surrogate UTF-16 low value %04x out of range", u2);
                                fatal();
                            }
                            u = (u - 0xD7C0) << 10;
                            u |= (u2 - 0xDC00);
                        }
                        else if (u >= 0xDC00 && u <= 0xDFFF)
                        {   error("unpaired surrogate UTF-16 value %04x", u);
                            fatal();
                        }
                        else if (u == 0xFFFE || u == 0xFFFF)
                        {   error("illegal UTF-16 value %04x", u);
                            fatal();
                        }
                        dbuf.writeUTF8(u);
                    }
                    else
                        dbuf.writeByte(u);
                }
                dbuf.writeByte(0);              // add 0 as sentinel for scanner
                buflen = dbuf.offset - 1;       // don't include sentinel in count
                buf = (unsigned char *) dbuf.extractData();
            }
        }
        else if (buf[0] == 0xFE && buf[1] == 0xFF)
        {   // UTF-16BE
            le = 0;
            goto Lutf16;
        }
        else if (buflen >= 4 && buf[0] == 0 && buf[1] == 0 && buf[2] == 0xFE && buf[3] == 0xFF)
        {   // UTF-32BE
            le = 0;
            goto Lutf32;
        }
        else if (buflen >= 3 && buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF)
        {   // UTF-8

            buf += 3;
            buflen -= 3;
        }
        else
        {
            /* There is no BOM. Make use of Arcane Jill's insight that
             * the first char of D source must be ASCII to
             * figure out the encoding.
             */

            bom = 0;
            if (buflen >= 4)
            {   if (buf[1] == 0 && buf[2] == 0 && buf[3] == 0)
                {   // UTF-32LE
                    le = 1;
                    goto Lutf32;
                }
                else if (buf[0] == 0 && buf[1] == 0 && buf[2] == 0)
                {   // UTF-32BE
                    le = 0;
                    goto Lutf32;
                }
            }
            if (buflen >= 2)
            {
                if (buf[1] == 0)
                {   // UTF-16LE
                    le = 1;
                    goto Lutf16;
                }
                else if (buf[0] == 0)
                {   // UTF-16BE
                    le = 0;
                    goto Lutf16;
                }
            }

            // It's UTF-8
            if (buf[0] >= 0x80)
            {   error("source file must start with BOM or ASCII character, not \\x%02X", buf[0]);
                fatal();
            }
        }
    }

    /* If it starts with the string "Ddoc", then it's a documentation
     * source file.
     */
    if (buflen >= 4 && memcmp(buf, "Ddoc", 4) == 0)
    {
        comment = buf + 4;
        isDocFile = 1;
        if (!docfile)
            setDocfile();
        return;
    }
    Parser p(this, buf, buflen, docfile != NULL);
    p.nextToken();
    members = p.parseModule();

    if (srcfile->ref == 0)
        ::free(srcfile->buffer);
    srcfile->buffer = NULL;
    srcfile->len = 0;

    md = p.md;
    numlines = p.loc.linnum;

    /* The symbol table into which the module is to be inserted.
     */
    DsymbolTable *dst;

    if (md)
    {   /* A ModuleDeclaration, md, was provided.
         * The ModuleDeclaration sets the packages this module appears in, and
         * the name of this module.
         */
        this->ident = md->id;
        this->safe = md->safe;
        Package *ppack = NULL;
        dst = Package::resolve(md->packages, &this->parent, &ppack);
#if 0
        if (ppack && ppack->isModule())
        {
            error(loc, "package name '%s' in file %s conflicts with usage as a module name in file %s",
                ppack->toChars(), srcname, ppack->isModule()->srcfile->toChars());
            dst = modules;
        }
#endif
    }
    else
    {   /* The name of the module is set to the source file name.
         * There are no packages.
         */
        dst = modules;          // and so this module goes into global module symbol table

        /* Check to see if module name is a valid identifier
         */
        if (!Lexer::isValidIdentifier(this->ident->toChars()))
            error("has non-identifier characters in filename, use module declaration instead");
    }

    // Insert module into the symbol table
    Dsymbol *s = this;
    bool isPackageMod = strcmp(srcfile->name->name(), "package.d") == 0;
    if (isPackageMod)
    {
        /* If the source tree is as follows:
         *     pkg/
         *     +- package.d
         *     +- common.d
         * the 'pkg' will be incorporated to the internal package tree in two ways:
         *     import pkg;
         * and:
         *     import pkg.common;
         *
         * If both are used in one compilation, 'pkg' as a module (== pkg/package.d)
         * and a package name 'pkg' will conflict each other.
         *
         * To avoid the confliction,
         * 1. If preceding package name insertion had occurred by Package::resolve,
         *    later package.d loading will change Package::isPkgMod to PKGmodule and set Package::mod.
         * 2. Otherwise, 'package.d' wrapped by 'Package' is inserted to the internal tree in here.
         */
        Package *p = new Package(ident);
        p->isPkgMod = PKGmodule;
        p->mod = this;
        p->symtab = new DsymbolTable();
        s = p;
    }
    if (!dst->insert(s))
    {
        /* It conflicts with a name that is already in the symbol table.
         * Figure out what went wrong, and issue error message.
         */
        Dsymbol *prev = dst->lookup(ident);
        assert(prev);
        Module *mprev = prev->isModule();
        if (mprev)
        {
            if (strcmp(srcname, mprev->srcfile->toChars()) == 0)
                error(loc, "from file %s must be imported as module '%s'",
                    srcname, toPrettyChars());
            else
                error(loc, "from file %s conflicts with another module %s from file %s",
                    srcname, mprev->toChars(), mprev->srcfile->toChars());
        }
        else
        {
            Package *pkg = prev->isPackage();
            assert(pkg);
            if (pkg->isPkgMod == PKGunknown && isPackageMod)
            {
                /* If the previous inserted Package is not yet determined as package.d,
                 * link it to the actual module.
                 */
                pkg->isPkgMod = PKGmodule;
                pkg->mod = this;
            }
            else
                error(pkg->loc, "from file %s conflicts with package name %s",
                    srcname, pkg->toChars());
        }
    }
    else
    {
        // Add to global array of all modules
        amodules.push(this);
    }
}
Пример #2
0
Module::Module(char *filename, Identifier *ident, int doDocComment, int doHdrGen)
        : Package(ident)
{
    FileName *srcfilename;
    FileName *objfilename;
    FileName *symfilename;

//    printf("Module::Module(filename = '%s', ident = '%s')\n", filename, ident->toChars());
    this->arg = filename;
    md = NULL;
    errors = 0;
    numlines = 0;
    members = NULL;
    isDocFile = 0;
    needmoduleinfo = 0;
#ifdef IN_GCC
    strictlyneedmoduleinfo = 0;
#endif
    selfimports = 0;
    insearch = 0;
    searchCacheIdent = NULL;
    searchCacheSymbol = NULL;
    searchCacheFlags = 0;
    semanticstarted = 0;
    semanticRun = 0;
    decldefs = NULL;
    vmoduleinfo = NULL;
    massert = NULL;
    munittest = NULL;
    marray = NULL;
    sictor = NULL;
    sctor = NULL;
    sdtor = NULL;
    ssharedctor = NULL;
    sshareddtor = NULL;
    stest = NULL;
    sfilename = NULL;
    root = 0;
    importedFrom = NULL;
    srcfile = NULL;
    docfile = NULL;

    debuglevel = 0;
    debugids = NULL;
    debugidsNot = NULL;
    versionlevel = 0;
    versionids = NULL;
    versionidsNot = NULL;

    macrotable = NULL;
    escapetable = NULL;
    safe = FALSE;
    doppelganger = 0;
    cov = NULL;
    covb = NULL;

    nameoffset = 0;
    namelen = 0;

    srcfilename = FileName::defaultExt(filename, global.mars_ext);
    if (!srcfilename->equalsExt(global.mars_ext) &&
        !srcfilename->equalsExt(global.hdr_ext) &&
        !srcfilename->equalsExt("dd"))
    {
        error("source file name '%s' must have .%s extension", srcfilename->toChars(), global.mars_ext);
        fatal();
    }

    char *argobj;
    if (global.params.objname)
        argobj = global.params.objname;
#if 0
    else if (global.params.preservePaths)
        argobj = filename;
    else
        argobj = FileName::name(filename);
    if (!FileName::absolute(argobj))
    {
        argobj = FileName::combine(global.params.objdir, argobj);
    }
#else // Bugzilla 3547
    else
    {
        if (global.params.preservePaths)
            argobj = filename;
        else
            argobj = FileName::name(filename);
        if (!FileName::absolute(argobj))
        {
            argobj = FileName::combine(global.params.objdir, argobj);
        }
    }
#endif

    if (global.params.objname)
        objfilename = new FileName(argobj);
    else
        objfilename = FileName::forceExt(argobj, global.obj_ext);

    symfilename = FileName::forceExt(filename, global.sym_ext);

    srcfile = new File(srcfilename);

    if (doDocComment)
    {
        setDocfile();
    }

    if (doHdrGen)
    {
        setHdrfile();
    }

    objfile = new File(objfilename);
    symfile = new File(symfilename);
}
Пример #3
0
Module::Module(const char *filename, Identifier *ident, int doDocComment, int doHdrGen)
        : Package(ident)
{
    const char *srcfilename;

//    printf("Module::Module(filename = '%s', ident = '%s')\n", filename, ident->toChars());
    this->arg = filename;
    md = NULL;
    errors = 0;
    numlines = 0;
    members = NULL;
    isDocFile = 0;
    isPackageFile = false;
    needmoduleinfo = 0;
    selfimports = 0;
    rootimports = 0;
    insearch = 0;
    searchCacheIdent = NULL;
    searchCacheSymbol = NULL;
    searchCacheFlags = 0;
    decldefs = NULL;
    massert = NULL;
    munittest = NULL;
    marray = NULL;
    sictor = NULL;
    sctor = NULL;
    sdtor = NULL;
    ssharedctor = NULL;
    sshareddtor = NULL;
    stest = NULL;
    sfilename = NULL;
    importedFrom = NULL;
    srcfile = NULL;
    docfile = NULL;

    debuglevel = 0;
    debugids = NULL;
    debugidsNot = NULL;
    versionlevel = 0;
    versionids = NULL;
    versionidsNot = NULL;

    macrotable = NULL;
    escapetable = NULL;
    doppelganger = 0;
    cov = NULL;
    covb = NULL;

    nameoffset = 0;
    namelen = 0;

    srcfilename = FileName::defaultExt(filename, global.mars_ext);

    if (global.run_noext && global.params.run &&
        !FileName::ext(filename) &&
        FileName::exists(srcfilename) == 0 &&
        FileName::exists(filename) == 1)
    {
        FileName::free(srcfilename);
        srcfilename = FileName::removeExt(filename);    // just does a mem.strdup(filename)
    }
    else if (!FileName::equalsExt(srcfilename, global.mars_ext) &&
        !FileName::equalsExt(srcfilename, global.hdr_ext) &&
        !FileName::equalsExt(srcfilename, "dd"))
    {
        error("source file name '%s' must have .%s extension", srcfilename, global.mars_ext);
        fatal();
    }
    srcfile = new File(srcfilename);

    objfile = setOutfile(global.params.objname, global.params.objdir, filename, global.obj_ext);

    if (doDocComment)
        setDocfile();

    if (doHdrGen)
        hdrfile = setOutfile(global.params.hdrname, global.params.hdrdir, arg, global.hdr_ext);

    //objfile = new File(objfilename);
}
Пример #4
0
void Module::parse()
{
    //printf("Module::parse()\n");

    char *srcname = srcfile->name->toChars();
    //printf("Module::parse(srcname = '%s')\n", srcname);

    unsigned char *buf = srcfile->buffer;
    unsigned buflen = srcfile->len;

    if (buflen >= 2)
    {
        /* Convert all non-UTF-8 formats to UTF-8.
         * BOM : http://www.unicode.org/faq/utf_bom.html
         * 00 00 FE FF  UTF-32BE, big-endian
         * FF FE 00 00  UTF-32LE, little-endian
         * FE FF        UTF-16BE, big-endian
         * FF FE        UTF-16LE, little-endian
         * EF BB BF     UTF-8
         */

        unsigned le;
        unsigned bom = 1;                // assume there's a BOM
        if (buf[0] == 0xFF && buf[1] == 0xFE)
        {
            if (buflen >= 4 && buf[2] == 0 && buf[3] == 0)
            {   // UTF-32LE
                le = 1;

            Lutf32:
                OutBuffer dbuf;
                unsigned *pu = (unsigned *)(buf);
                unsigned *pumax = &pu[buflen / 4];

                if (buflen & 3)
                {   error("odd length of UTF-32 char source %u", buflen);
                    fatal();
                }

                dbuf.reserve(buflen / 4);
                for (pu += bom; pu < pumax; pu++)
                {   unsigned u;

                    u = le ? readlongLE(pu) : readlongBE(pu);
                    if (u & ~0x7F)
                    {
                        if (u > 0x10FFFF)
                        {   error("UTF-32 value %08x greater than 0x10FFFF", u);
                            fatal();
                        }
                        dbuf.writeUTF8(u);
                    }
                    else
                        dbuf.writeByte(u);
                }
                dbuf.writeByte(0);              // add 0 as sentinel for scanner
                buflen = dbuf.offset - 1;       // don't include sentinel in count
                buf = (unsigned char *) dbuf.extractData();
            }
            else
            {   // UTF-16LE (X86)
                // Convert it to UTF-8
                le = 1;

            Lutf16:
                OutBuffer dbuf;
                unsigned short *pu = (unsigned short *)(buf);
                unsigned short *pumax = &pu[buflen / 2];

                if (buflen & 1)
                {   error("odd length of UTF-16 char source %u", buflen);
                    fatal();
                }

                dbuf.reserve(buflen / 2);
                for (pu += bom; pu < pumax; pu++)
                {   unsigned u;

                    u = le ? readwordLE(pu) : readwordBE(pu);
                    if (u & ~0x7F)
                    {   if (u >= 0xD800 && u <= 0xDBFF)
                        {   unsigned u2;

                            if (++pu > pumax)
                            {   error("surrogate UTF-16 high value %04x at EOF", u);
                                fatal();
                            }
                            u2 = le ? readwordLE(pu) : readwordBE(pu);
                            if (u2 < 0xDC00 || u2 > 0xDFFF)
                            {   error("surrogate UTF-16 low value %04x out of range", u2);
                                fatal();
                            }
                            u = (u - 0xD7C0) << 10;
                            u |= (u2 - 0xDC00);
                        }
                        else if (u >= 0xDC00 && u <= 0xDFFF)
                        {   error("unpaired surrogate UTF-16 value %04x", u);
                            fatal();
                        }
                        else if (u == 0xFFFE || u == 0xFFFF)
                        {   error("illegal UTF-16 value %04x", u);
                            fatal();
                        }
                        dbuf.writeUTF8(u);
                    }
                    else
                        dbuf.writeByte(u);
                }
                dbuf.writeByte(0);              // add 0 as sentinel for scanner
                buflen = dbuf.offset - 1;       // don't include sentinel in count
                buf = (unsigned char *) dbuf.extractData();
            }
        }
        else if (buf[0] == 0xFE && buf[1] == 0xFF)
        {   // UTF-16BE
            le = 0;
            goto Lutf16;
        }
        else if (buflen >= 4 && buf[0] == 0 && buf[1] == 0 && buf[2] == 0xFE && buf[3] == 0xFF)
        {   // UTF-32BE
            le = 0;
            goto Lutf32;
        }
        else if (buflen >= 3 && buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF)
        {   // UTF-8

            buf += 3;
            buflen -= 3;
        }
        else
        {
            /* There is no BOM. Make use of Arcane Jill's insight that
             * the first char of D source must be ASCII to
             * figure out the encoding.
             */

            bom = 0;
            if (buflen >= 4)
            {   if (buf[1] == 0 && buf[2] == 0 && buf[3] == 0)
                {   // UTF-32LE
                    le = 1;
                    goto Lutf32;
                }
                else if (buf[0] == 0 && buf[1] == 0 && buf[2] == 0)
                {   // UTF-32BE
                    le = 0;
                    goto Lutf32;
                }
            }
            if (buflen >= 2)
            {
                if (buf[1] == 0)
                {   // UTF-16LE
                    le = 1;
                    goto Lutf16;
                }
                else if (buf[0] == 0)
                {   // UTF-16BE
                    le = 0;
                    goto Lutf16;
                }
            }

            // It's UTF-8
            if (buf[0] >= 0x80)
            {   error("source file must start with BOM or ASCII character, not \\x%02X", buf[0]);
                fatal();
            }
        }
    }

#ifdef IN_GCC
    // dump utf-8 encoded source
    if (global.params.dump_source)
    {   // %% srcname could contain a path ...
        d_gcc_dump_source(srcname, "utf-8", buf, buflen);
    }
#endif

    /* If it starts with the string "Ddoc", then it's a documentation
     * source file.
     */
    if (buflen >= 4 && memcmp(buf, "Ddoc", 4) == 0)
    {
        comment = buf + 4;
        isDocFile = 1;
        if (!docfile)
            setDocfile();
        return;
    }
    Parser p(this, buf, buflen, docfile != NULL);
    p.nextToken();
    members = p.parseModule();

    ::free(srcfile->buffer);
    srcfile->buffer = NULL;
    srcfile->len = 0;

    md = p.md;
    numlines = p.loc.linnum;

    DsymbolTable *dst;

    if (md)
    {   this->ident = md->id;
        this->safe = md->safe;
        Package *ppack = NULL;
        dst = Package::resolve(md->packages, &this->parent, &ppack);
        if (ppack && ppack->isModule())
        {
            error(loc, "package name '%s' in file %s conflicts with usage as a module name in file %s",
                ppack->toChars(), srcname, ppack->isModule()->srcfile->toChars());
            dst = modules;
        }
    }
    else
    {
        dst = modules;

        /* Check to see if module name is a valid identifier
         */
        if (!Lexer::isValidIdentifier(this->ident->toChars()))
            error("has non-identifier characters in filename, use module declaration instead");
    }

    // Update global list of modules
    if (!dst->insert(this))
    {
        Dsymbol *prev = dst->lookup(ident);
        assert(prev);
        Module *mprev = prev->isModule();
        if (mprev)
            error(loc, "from file %s conflicts with another module %s from file %s",
                srcname, mprev->toChars(), mprev->srcfile->toChars());
        else
        {
            Package *pkg = prev->isPackage();
            assert(pkg);
            error(pkg->loc, "from file %s conflicts with package name %s",
                srcname, pkg->toChars());
        }
    }
    else
    {
        amodules.push(this);
    }
}