void PackTos::pack(OutputFile *fo)
{
    unsigned t;
    unsigned nrelocs = 0;
    unsigned relocsize = 0;
    unsigned overlay = 0;

    const unsigned i_text = ih.fh_text;
    const unsigned i_data = ih.fh_data;
    const unsigned i_sym = ih.fh_sym;
    const unsigned i_bss = ih.fh_bss;

    symbols.reset();
    symbols.need_reloc = false;
    // prepare symbols for buildLoader() - worst case
    symbols.loop1.init(65536 + 1);
    symbols.loop2.init((160 - 1) / 4);
    symbols.loop3.init(65536 + 1);
    symbols.up21_d4 = 65536 + 1;
    symbols.up21_a6 = 65536 + 1;
    symbols.up31_base_d4 = 65536 + 1;
    symbols.up31_base_a6 = 65536 + 1;

    // read file
    const unsigned isize = file_size - i_sym;
    ibuf.alloc(isize);
    fi->seek(FH_SIZE, SEEK_SET);
    // read text + data
    t = i_text + i_data;
    fi->readx(ibuf,t);
    // skip symbols
    if (i_sym && opt->exact)
        throwCantPackExact();
    fi->seek(i_sym,SEEK_CUR);
    // read relocations + overlay
    overlay = file_size - (FH_SIZE + i_text + i_data + i_sym);
    fi->readx(ibuf+t,overlay);

#if 0 || (TESTING)
    printf("text: %d, data: %d, sym: %d, bss: %d, flags=0x%x\n",
           i_text, i_data, i_sym, i_bss, (int)ih.fh_flag);
    printf("xx1 reloc: %d, overlay: %d, fixup: %d\n", relocsize, overlay, overlay >= 4 ? (int)get_be32(ibuf+t) : -1);
#endif

    // Check relocs (see load_and_reloc() in freemint/sys/memory.c).
    // Must work around TOS bugs and lots of broken programs.
    if (overlay < 4)
    {
        // Bug workaround: Whatever this is, silently keep it in
        // the (unused) relocations for byte-identical unpacking.
        relocsize = overlay;
        overlay = 0;
    }
    else if (get_be32(ibuf+t) == 0)
    {
        // Bug workaround - check the empty fixup before testing fh_reloc.
        relocsize = 4;
        overlay -= 4;
    }
    else if (ih.fh_reloc != 0)
        relocsize = 0;
    else {
        int r = check_relocs(ibuf+t, overlay, t, &nrelocs, &relocsize, &overlay);
        if (r != 0)
            throwCantPack("bad relocation table");
        symbols.need_reloc = true;
    }

#if 0 || (TESTING)
    printf("xx2: %d relocs: %d, overlay: %d, t: %d\n", nrelocs, relocsize, overlay, t);
#endif

    checkOverlay(overlay);

    // Append original fileheader.
    t += relocsize;
    ih.fh_sym = 0;                      // we stripped all symbols
    memcpy(ibuf+t, &ih, FH_SIZE);
    t += FH_SIZE;
#if 0 || (TESTING)
    printf("xx3 reloc: %d, overlay: %d, t: %d\n", relocsize, overlay, t);
#endif
    assert(t <= isize);

    // Now the data in ibuf[0..t] looks like this:
    //   text + data + relocs + original file header
    // After compression this will become the first part of the
    // data segement. The second part will be the decompressor.

    // alloc buffer (4096 is for decompressor and the various alignments)
    obuf.allocForCompression(t, 4096);

    // prepare packheader
    ph.u_len = t;
    // prepare filter
    Filter ft(ph.level);
    // compress (max_match = 65535)
    upx_compress_config_t cconf; cconf.reset();
    cconf.conf_ucl.max_match = 65535;
    cconf.conf_lzma.max_num_probs = 1846 + (768 << 4); // ushort: ~28 KiB stack
    compressWithFilters(&ft, 512, &cconf);

    //
    // multipass buildLoader()
    //

    // save initial loader
    const unsigned initial_lsize = getLoaderSize();
    unsigned last_lsize = initial_lsize;
    MemBuffer last_loader(last_lsize);
    memcpy(last_loader, getLoader(), last_lsize);

    unsigned o_text, o_data, o_bss;
    unsigned e_len, d_len, d_off;
    for (;;)
    {
        // The decompressed data will now get placed at this offset:
        unsigned offset = (ph.u_len + ph.overlap_overhead) - ph.c_len;

        // get loader
        const unsigned lsize = getLoaderSize();
        e_len = getLoaderSectionStart("CUTPOINT");
        d_len = lsize - e_len;
        assert((e_len & 3) == 0 && (d_len & 1) == 0);

        // compute section sizes
        o_text = e_len;
        o_data = ph.c_len;
        o_bss = i_bss;

        // word align len of compressed data
        while (o_data & 1)
        {
            obuf[o_data++] = 0;
            offset++;
        }

        // append decompressor (part 2 of loader)
        d_off = o_data;
        ////memcpy(obuf + d_off, getLoader() + e_len, d_len); // must be done after relocation
        o_data += d_len;

        // dword align the len of the final data segment
        while (o_data & 3)
        {
            obuf[o_data++] = 0;
            offset++;
        }
        // dword align offset
        while (offset & 3)
            offset++;

        // new bss
        if (i_text + i_data + i_bss > o_text + o_data + o_bss)
            o_bss = (i_text + i_data + i_bss) - (o_text + o_data);

        // dirty bss
        unsigned dirty_bss = (o_data + offset) - (i_text + i_data);
        //printf("real dirty_bss: %d\n", dirty_bss);
        // dword align (or 16 - for speedup when clearing the dirty bss)
        const unsigned dirty_bss_align = opt->small ? 4 : 16;
        while (dirty_bss & (dirty_bss_align - 1))
            dirty_bss++;
        // adjust bss, assert room for some stack
        unsigned stack = 512 + getDecompressorWrkmemSize();
        if (dirty_bss + stack > o_bss)
            o_bss = dirty_bss + stack;

        // dword align the len of the final bss segment
        while (o_bss & 3)
            o_bss++;

        // update symbols for buildLoader()
        if (opt->small)
        {
            symbols.loop1.init(o_data / 4);
            symbols.loop2.init(0);
        }
        else
        {
            symbols.loop1.init(o_data / 160);
            symbols.loop2.init((o_data % 160) / 4);
        }
        symbols.loop3.init(dirty_bss / dirty_bss_align);

        symbols.up21_d4 = o_data + offset;
        symbols.up31_base_d4 = d_off + offset;
        symbols.up21_a6 = symbols.up21_d4 - (i_text + i_data);
        symbols.up31_base_a6 = symbols.up31_base_d4 - (i_text + i_data);
        assert((int)symbols.up21_a6 > 0);
        assert((int)symbols.up31_base_a6 > 0);

        const unsigned c = linker->getSymbolOffset("code_on_stack");
        unsigned d;
        d = linker->getSymbolOffset("flush_cache_rts") - c;
        symbols.flush_cache_rts_offset = d;
        d = linker->getSymbolOffset("clear_dirty_stack_loop") - c;
        symbols.clear_dirty_stack_len = (d + 3) / 4 + 32 - 1;
        d = linker->getSymbolOffset("code_on_stack_end") - c;
        symbols.copy_to_stack_len = d / 2 - 1;

        // now re-build loader
        buildLoader(&ft);
        unsigned new_lsize = getLoaderSize();
        //printf("buildLoader %d %d\n", new_lsize, initial_lsize);
        assert(new_lsize <= initial_lsize);
        if (new_lsize == last_lsize && memcmp(getLoader(), last_loader, last_lsize) == 0)
            break;
        last_lsize = new_lsize;
        memcpy(last_loader, getLoader(), last_lsize);
    }

    //
    // define symbols and reloc
    //

    defineDecompressorSymbols();

    linker->defineSymbol("loop1_count", symbols.loop1.value);
    linker->defineSymbol("loop2_count", symbols.loop2.value);
    linker->defineSymbol("loop3_count", symbols.loop3.value);

    linker->defineSymbol("orig_p_tlen", i_text);
    linker->defineSymbol("orig_p_dlen", i_data);
    linker->defineSymbol("orig_p_blen", i_bss);

    if (symbols.up21_a6 <= 32767)
        linker->defineSymbol("up21_a6", symbols.up21_a6);
    else
        linker->defineSymbol("up21_d4", symbols.up21_d4);

    if (symbols.up31_a6 <= 32767)
        linker->defineSymbol("up31_a6", symbols.up31_a6);
    else if (symbols.up31_d4 <= 32767)
        linker->defineSymbol("up31_d4", symbols.up31_d4);
    else if (symbols.up31_a6 <= 65534)
        linker->defineSymbol("up31_a6", symbols.up31_a6 - 32767);
    else
        linker->defineSymbol("up31_d4", symbols.up31_d4);
#if 0
    printf("relocsize = %d\n", relocsize);
    printf("upx21(d4) = %d\n", symbols.up21_d4);
    printf("upx21(a6) = %d\n", symbols.up21_a6);
    printf("upx31(d4) = %d\n", symbols.up31_d4);
    printf("upx31(a6) = %d\n", symbols.up31_a6);
#endif

    linker->defineSymbol("flush_cache_rts_offset", symbols.flush_cache_rts_offset);
    linker->defineSymbol("copy_to_stack_len", symbols.copy_to_stack_len);
    linker->defineSymbol("clear_dirty_stack_len", symbols.clear_dirty_stack_len);

    relocateLoader();

    //
    // write
    //

    // set new file_hdr
    memcpy(&oh, &ih, FH_SIZE);
    if (opt->atari_tos.split_segments)
    {
        oh.fh_text = o_text;
        oh.fh_data = o_data;
    }
    else
    {
        // put everything into the text segment
        oh.fh_text = o_text + o_data;
        oh.fh_data = 0;
    }
    oh.fh_bss = o_bss;
    oh.fh_sym = 0;
    oh.fh_reserved = 0;
    // only keep the following flags:
    oh.fh_flag = ih.fh_flag & (F_FASTLOAD | F_ALTALLOC | F_SMALLTPA | F_ALLOCZERO | F_KEEP);
    // add an empty relocation fixup to workaround a bug in some TOS versions
    oh.fh_reloc = 0;

#if 0 || (TESTING)
    printf("old text: %6d, data: %6d, bss: %6d, reloc: %d, overlay: %d\n",
           i_text, i_data, i_bss, relocsize, overlay);
    printf("new text: %6d, data: %6d, bss: %6d, flag=0x%x\n",
           o_text, o_data, o_bss, (int)oh.fh_flag);
    linker->dumpSymbols();
#endif

    // prepare loader
    MemBuffer loader(o_text);
    memcpy(loader, getLoader(), o_text);
    patchPackHeader(loader, o_text);

    // write new file header, loader and compressed file
    fo->write(&oh, FH_SIZE);
    fo->write(loader, o_text);  // entry
    if (opt->debug.dump_stub_loader)
        OutputFile::dump(opt->debug.dump_stub_loader, loader, o_text);
    memcpy(obuf + d_off, getLoader() + e_len, d_len); // copy decompressor
    fo->write(obuf, o_data);    // compressed + decompressor

    // write empty relocation fixup
    fo->write("\x00\x00\x00\x00", 4);

    // verify
    verifyOverlappingDecompression();

    // copy the overlay
    copyOverlay(fo, overlay, &obuf);

    // finally check the compression ratio
    if (!checkFinalCompressionRatio(fo))
        throwNotCompressible();
}
void PackArmPe::pack(OutputFile *fo)
{
    // FIXME: we need to think about better support for --exact
    if (opt->exact)
        throwCantPackExact();

    const unsigned objs = ih.objects;
    isection = new pe_section_t[objs];
    fi->seek(pe_offset+sizeof(ih),SEEK_SET);
    fi->readx(isection,sizeof(pe_section_t)*objs);

    rvamin = isection[0].vaddr;

    infoHeader("[Processing %s, format %s, %d sections]", fn_basename(fi->getName()), getName(), objs);

    // check the PE header
    // FIXME: add more checks
    if (!opt->force && (
           (ih.cpu != 0x1c0 && ih.cpu != 0x1c2)
        || (ih.opthdrsize != 0xe0)
        || ((ih.flags & EXECUTABLE) == 0)
        || (ih.subsystem != 9)
        || (ih.entry == 0 /*&& !isdll*/)
        || (ih.ddirsentries != 16)
//        || IDSIZE(PEDIR_EXCEPTION) // is this used on arm?
//        || IDSIZE(PEDIR_COPYRIGHT)
       ))
        throwCantPack("unexpected value in PE header (try --force)");

    if (IDSIZE(PEDIR_SEC))
        IDSIZE(PEDIR_SEC) = IDADDR(PEDIR_SEC) = 0;
    //    throwCantPack("compressing certificate info is not supported");

    if (IDSIZE(PEDIR_COMRT))
        throwCantPack(".NET files (win32/net) are not yet supported");

    if (isdll)
        opt->win32_pe.strip_relocs = false;
    else if (opt->win32_pe.strip_relocs < 0)
        opt->win32_pe.strip_relocs = (ih.imagebase >= 0x10000);
    if (opt->win32_pe.strip_relocs)
    {
        if (ih.imagebase < 0x10000)
            throwCantPack("--strip-relocs is not allowed when imagebase < 0x10000");
        else
            ih.flags |= RELOCS_STRIPPED;
    }

    if (memcmp(isection[0].name,"UPX",3) == 0)
        throwAlreadyPackedByUPX();
    if (!opt->force && IDSIZE(15))
        throwCantPack("file is possibly packed/protected (try --force)");
    if (ih.entry && ih.entry < rvamin)
        throwCantPack("run a virus scanner on this file!");
    if (!opt->force && ih.subsystem == 1)
        throwCantPack("subsystem 'native' is not supported (try --force)");
    if (ih.filealign < 0x200)
        throwCantPack("filealign < 0x200 is not yet supported");

    handleStub(fi,fo,pe_offset);
    const unsigned usize = ih.imagesize;
    const unsigned xtrasize = UPX_MAX(ih.datasize, 65536u) + IDSIZE(PEDIR_IMPORT) + IDSIZE(PEDIR_BOUNDIM) + IDSIZE(PEDIR_IAT) + IDSIZE(PEDIR_DELAYIMP) + IDSIZE(PEDIR_RELOC);
    ibuf.alloc(usize + xtrasize);

    // BOUND IMPORT support. FIXME: is this ok?
    fi->seek(0,SEEK_SET);
    fi->readx(ibuf,isection[0].rawdataptr);

    Interval holes(ibuf);

    unsigned ic,jc,overlaystart = 0;
    ibuf.clear(0, usize);
    for (ic = jc = 0; ic < objs; ic++)
    {
        if (isection[ic].rawdataptr && overlaystart < isection[ic].rawdataptr + isection[ic].size)
            overlaystart = ALIGN_UP(isection[ic].rawdataptr + isection[ic].size,ih.filealign);
        if (isection[ic].vsize == 0)
            isection[ic].vsize = isection[ic].size;
        if ((isection[ic].flags & PEFL_BSS) || isection[ic].rawdataptr == 0
            || (isection[ic].flags & PEFL_INFO))
        {
            holes.add(isection[ic].vaddr,isection[ic].vsize);
            continue;
        }
        if (isection[ic].vaddr + isection[ic].size > usize)
            throwCantPack("section size problem");
        if (((isection[ic].flags & (PEFL_WRITE|PEFL_SHARED))
            == (PEFL_WRITE|PEFL_SHARED)))
            if (!opt->force)
                throwCantPack("writable shared sections not supported (try --force)");
        if (jc && isection[ic].rawdataptr - jc > ih.filealign)
            throwCantPack("superfluous data between sections");
        fi->seek(isection[ic].rawdataptr,SEEK_SET);
        jc = isection[ic].size;
        if (jc > isection[ic].vsize)
            jc = isection[ic].vsize;
        if (isection[ic].vsize == 0) // hack for some tricky programs - may this break other progs?
            jc = isection[ic].vsize = isection[ic].size;
        if (isection[ic].vaddr + jc > ibuf.getSize())
            throwInternalError("buffer too small 1");
        fi->readx(ibuf + isection[ic].vaddr,jc);
        jc += isection[ic].rawdataptr;
    }

    // check for NeoLite
    if (find(ibuf + ih.entry, 64+7, "NeoLite", 7) >= 0)
        throwCantPack("file is already compressed with another packer");

    unsigned overlay = file_size - stripDebug(overlaystart);
    if (overlay >= (unsigned) file_size)
    {
#if 0
        if (overlay < file_size + ih.filealign)
            overlay = 0;
        else if (!opt->force)
            throwNotCompressible("overlay problem (try --force)");
#endif
        overlay = 0;
    }
    checkOverlay(overlay);

    Resource res;
    Interval tlsiv(ibuf);
    Export xport((char*)(unsigned char*)ibuf);

    const unsigned dllstrings = processImports();
    processTls(&tlsiv); // call before processRelocs!!
    processResources(&res);
    processExports(&xport);
    processRelocs();

    //OutputFile::dump("x1", ibuf, usize);

    // some checks for broken linkers - disable filter if necessary
    bool allow_filter = true;
    if (ih.codebase == ih.database
        || ih.codebase + ih.codesize > ih.imagesize
        || (isection[virta2objnum(ih.codebase,isection,objs)].flags & PEFL_CODE) == 0)
        allow_filter = false;

    const unsigned oam1 = ih.objectalign - 1;

    // FIXME: disabled: the uncompressor would not allocate enough memory
    //objs = tryremove(IDADDR(PEDIR_RELOC),objs);

    // FIXME: if the last object has a bss then this won't work
    // newvsize = (isection[objs-1].vaddr + isection[objs-1].size + oam1) &~ oam1;
    // temporary solution:
    unsigned newvsize = (isection[objs-1].vaddr + isection[objs-1].vsize + oam1) &~ oam1;

    //fprintf(stderr,"newvsize=%x objs=%d\n",newvsize,objs);
    if (newvsize + soimport + sorelocs > ibuf.getSize())
         throwInternalError("buffer too small 2");
    memcpy(ibuf+newvsize,oimport,soimport);
    memcpy(ibuf+newvsize+soimport,orelocs,sorelocs);

    cimports = newvsize - rvamin;   // rva of preprocessed imports
    crelocs = cimports + soimport;  // rva of preprocessed fixups

    ph.u_len = newvsize + soimport + sorelocs;

    // some extra data for uncompression support
    unsigned s = 0;
    upx_byte * const p1 = ibuf + ph.u_len;
    memcpy(p1 + s,&ih,sizeof (ih));
    s += sizeof (ih);
    memcpy(p1 + s,isection,ih.objects * sizeof(*isection));
    s += ih.objects * sizeof(*isection);
    if (soimport)
    {
        set_le32(p1 + s,cimports);
        set_le32(p1 + s + 4,dllstrings);
        s += 8;
    }
    if (sorelocs)
    {
        set_le32(p1 + s,crelocs);
        p1[s + 4] = (unsigned char) (big_relocs & 6);
        s += 5;
    }
    if (soresources)
    {
        set_le16(p1 + s,icondir_count);
        s += 2;
    }
    // end of extra data
    set_le32(p1 + s,ptr_diff(p1,ibuf) - rvamin);
    s += 4;
    ph.u_len += s;
    obuf.allocForCompression(ph.u_len);

    // prepare packheader
    ph.u_len -= rvamin;
    // prepare filter
    Filter ft(ph.level);
    ft.buf_len = ih.codesize;
    ft.addvalue = ih.codebase - rvamin;
    // compress
    int filter_strategy = allow_filter ? 0 : -3;

    // disable filters for files with broken headers
    if (ih.codebase + ih.codesize > ph.u_len)
    {
        ft.buf_len = 1;
        filter_strategy = -3;
    }

    // limit stack size needed for runtime decompression
    upx_compress_config_t cconf; cconf.reset();
    cconf.conf_lzma.max_num_probs = 1846 + (768 << 4); // ushort: ~28 KiB stack
    compressWithFilters(&ft, 2048, &cconf, filter_strategy,
                        ih.codebase, rvamin, 0, NULL, 0);
// info: see buildLoader()
    newvsize = (ph.u_len + rvamin + ph.overlap_overhead + oam1) &~ oam1;
    /*
    if (tlsindex && ((newvsize - ph.c_len - 1024 + oam1) &~ oam1) > tlsindex + 4)
    tlsindex = 0;
    */

    const unsigned lsize = getLoaderSize();

    int identsize = 0;
    const unsigned codesize = getLoaderSection("IDENTSTR",&identsize);
    assert(identsize > 0);
    getLoaderSection("UPX1HEAD",(int*)&ic);
    identsize += ic;

    pe_section_t osection[4];
    // section 0 : bss
    //         1 : [ident + header] + packed_data + unpacker + tls
    //         2 : not compressed data
    //         3 : resource data -- wince 5 needs a new section for this

    // identsplit - number of ident + (upx header) bytes to put into the PE header
    int identsplit = pe_offset + sizeof(osection) + sizeof(oh);
    if ((identsplit & 0x1ff) == 0)
        identsplit = 0;
    else if (((identsplit + identsize) ^ identsplit) < 0x200)
        identsplit = identsize;
    else
        identsplit = ALIGN_GAP(identsplit, 0x200);
    ic = identsize - identsplit;

    const unsigned c_len = ((ph.c_len + ic) & 15) == 0 ? ph.c_len : ph.c_len + 16 - ((ph.c_len + ic) & 15);
    obuf.clear(ph.c_len, c_len - ph.c_len);

    const unsigned s1size = ALIGN_UP(ic + c_len + codesize,4u) + sotls;
    const unsigned s1addr = (newvsize - (ic + c_len) + oam1) &~ oam1;

    const unsigned ncsection = (s1addr + s1size + oam1) &~ oam1;
    const unsigned upxsection = s1addr + ic + c_len;

    Reloc rel(1024); // new relocations are put here
    static const char* symbols_to_relocate[] = {
        "ONAM", "BIMP", "BREL", "FIBE", "FIBS", "ENTR", "DST0", "SRC0"
    };
    for (unsigned s2r = 0; s2r < TABLESIZE(symbols_to_relocate); s2r++)
    {
        unsigned off = linker->getSymbolOffset(symbols_to_relocate[s2r]);
        if (off != 0xdeaddead)
            rel.add(off + upxsection, 3);
    }

    // new PE header
    memcpy(&oh,&ih,sizeof(oh));
    oh.filealign = 0x200; // identsplit depends on this
    memset(osection,0,sizeof(osection));

    oh.entry = upxsection;
    oh.objects = 4;
    oh.chksum = 0;

    // fill the data directory
    ODADDR(PEDIR_DEBUG) = 0;
    ODSIZE(PEDIR_DEBUG) = 0;
    ODADDR(PEDIR_IAT) = 0;
    ODSIZE(PEDIR_IAT) = 0;
    ODADDR(PEDIR_BOUNDIM) = 0;
    ODSIZE(PEDIR_BOUNDIM) = 0;


    // tls is put into section 1
    ic = s1addr + s1size - sotls;
    super::processTls(&rel,&tlsiv,ic);
    ODADDR(PEDIR_TLS) = sotls ? ic : 0;
    ODSIZE(PEDIR_TLS) = sotls ? 0x18 : 0;
    ic += sotls;

    // these are put into section 2

    ic = ncsection;

    // wince wants relocation data at the beginning of a section
    processRelocs(&rel);
    ODADDR(PEDIR_RELOC) = soxrelocs ? ic : 0;
    ODSIZE(PEDIR_RELOC) = soxrelocs;
    ic += soxrelocs;

    processImports(ic, linker->getSymbolOffset("IATT") + upxsection);
    ODADDR(PEDIR_IMPORT) = ic;
    ODSIZE(PEDIR_IMPORT) = soimpdlls;
    ic += soimpdlls;

    processExports(&xport,ic);
    ODADDR(PEDIR_EXPORT) = soexport ? ic : 0;
    ODSIZE(PEDIR_EXPORT) = soexport;
    if (!isdll && opt->win32_pe.compress_exports)
    {
        ODADDR(PEDIR_EXPORT) = IDADDR(PEDIR_EXPORT);
        ODSIZE(PEDIR_EXPORT) = IDSIZE(PEDIR_EXPORT);
    }
    ic += soexport;

    ic = (ic + oam1) &~ oam1;
    const unsigned res_start = ic;
    if (soresources)
        processResources(&res,ic);
    ODADDR(PEDIR_RESOURCE) = soresources ? ic : 0;
    ODSIZE(PEDIR_RESOURCE) = soresources;
    ic += soresources;

    const unsigned onam = ncsection + soxrelocs + ih.imagebase;
    linker->defineSymbol("start_of_dll_names", onam);
    linker->defineSymbol("start_of_imports", ih.imagebase + rvamin + cimports);
    linker->defineSymbol("start_of_relocs", crelocs + rvamin + ih.imagebase);
    linker->defineSymbol("filter_buffer_end", ih.imagebase + ih.codebase + ih.codesize);
    linker->defineSymbol("filter_buffer_start", ih.imagebase + ih.codebase);
    linker->defineSymbol("original_entry", ih.entry + ih.imagebase);
    linker->defineSymbol("uncompressed_length", ph.u_len);
    linker->defineSymbol("start_of_uncompressed", ih.imagebase + rvamin);
    linker->defineSymbol("compressed_length", ph.c_len);
    linker->defineSymbol("start_of_compressed", ih.imagebase + s1addr + identsize - identsplit);
    defineDecompressorSymbols();
    relocateLoader();

    MemBuffer loader(lsize);
    memcpy(loader, getLoader(), lsize);
    patchPackHeader(loader, lsize);

    // this is computed here, because soxrelocs changes some lines above
    const unsigned ncsize = soxrelocs + soimpdlls + soexport;
    const unsigned fam1 = oh.filealign - 1;

    // fill the sections
    strcpy(osection[0].name,"UPX0");
    strcpy(osection[1].name,"UPX1");
    strcpy(osection[2].name, "UPX2");
    strcpy(osection[3].name, ".rsrc");

    osection[0].vaddr = rvamin;
    osection[1].vaddr = s1addr;
    osection[2].vaddr = ncsection;
    osection[3].vaddr = res_start;

    osection[0].size = 0;
    osection[1].size = (s1size + fam1) &~ fam1;
    osection[2].size = (ncsize + fam1) &~ fam1;
    osection[3].size = (soresources + fam1) &~ fam1;

    osection[0].vsize = osection[1].vaddr - osection[0].vaddr;
    //osection[1].vsize = (osection[1].size + oam1) &~ oam1;
    //osection[2].vsize = (osection[2].size + oam1) &~ oam1;
    osection[1].vsize = osection[1].size;
    osection[2].vsize = osection[2].size;
    osection[3].vsize = osection[3].size;

    osection[0].rawdataptr = 0;
    osection[1].rawdataptr = (pe_offset + sizeof(oh) + sizeof(osection) + fam1) &~ fam1;
    osection[2].rawdataptr = osection[1].rawdataptr + osection[1].size;
    osection[3].rawdataptr = osection[2].rawdataptr + osection[2].size;

    osection[0].flags = (unsigned) (PEFL_BSS|PEFL_EXEC|PEFL_WRITE|PEFL_READ);
    osection[1].flags = (unsigned) (PEFL_DATA|PEFL_EXEC|PEFL_WRITE|PEFL_READ);
    osection[2].flags = (unsigned) (PEFL_DATA|PEFL_READ);
    osection[3].flags = (unsigned) (PEFL_DATA|PEFL_READ);

    oh.imagesize = (osection[3].vaddr + osection[3].vsize + oam1) &~ oam1;
    oh.bsssize  = osection[0].vsize;
    oh.datasize = osection[2].vsize + osection[3].vsize;
    oh.database = osection[2].vaddr;
    oh.codesize = osection[1].vsize;
    oh.codebase = osection[1].vaddr;
    oh.headersize = osection[1].rawdataptr;
    if (rvamin < osection[0].rawdataptr)
        throwCantPack("object alignment too small");

    if (opt->win32_pe.strip_relocs && !isdll)
        oh.flags |= RELOCS_STRIPPED;

    //for (ic = 0; ic < oh.filealign; ic += 4)
    //    set_le32(ibuf + ic,get_le32("UPX "));
    ibuf.clear(0, oh.filealign);

    info("Image size change: %u -> %u KiB",
         ih.imagesize / 1024, oh.imagesize / 1024);

    infoHeader("[Writing compressed file]");

    if (soresources == 0)
    {
        oh.objects = 3;
        memset(&osection[3], 0, sizeof(osection[3]));
    }
    // write loader + compressed file
    fo->write(&oh,sizeof(oh));
    fo->write(osection,sizeof(osection));
    // some alignment
    if (identsplit == identsize)
    {
        unsigned n = osection[1].rawdataptr - fo->getBytesWritten() - identsize;
        assert(n <= oh.filealign);
        fo->write(ibuf, n);
    }
    fo->write(loader + codesize,identsize);
    infoWriting("loader", fo->getBytesWritten());
    fo->write(obuf,c_len);
    infoWriting("compressed data", c_len);
    fo->write(loader,codesize);
    if (opt->debug.dump_stub_loader)
        OutputFile::dump(opt->debug.dump_stub_loader, loader, codesize);
    if ((ic = fo->getBytesWritten() & 3) != 0)
        fo->write(ibuf,4 - ic);
    fo->write(otls,sotls);
    if ((ic = fo->getBytesWritten() & fam1) != 0)
        fo->write(ibuf,oh.filealign - ic);
    fo->write(oxrelocs,soxrelocs);
    fo->write(oimpdlls,soimpdlls);
    fo->write(oexport,soexport);

    if ((ic = fo->getBytesWritten() & fam1) != 0)
        fo->write(ibuf,oh.filealign - ic);

    fo->write(oresources,soresources);
    if ((ic = fo->getBytesWritten() & fam1) != 0)
        fo->write(ibuf,oh.filealign - ic);

#if 0
    printf("%-13s: program hdr  : %8ld bytes\n", getName(), (long) sizeof(oh));
    printf("%-13s: sections     : %8ld bytes\n", getName(), (long) sizeof(osection));
    printf("%-13s: ident        : %8ld bytes\n", getName(), (long) identsize);
    printf("%-13s: compressed   : %8ld bytes\n", getName(), (long) c_len);
    printf("%-13s: decompressor : %8ld bytes\n", getName(), (long) codesize);
    printf("%-13s: tls          : %8ld bytes\n", getName(), (long) sotls);
    printf("%-13s: resources    : %8ld bytes\n", getName(), (long) soresources);
    printf("%-13s: imports      : %8ld bytes\n", getName(), (long) soimpdlls);
    printf("%-13s: exports      : %8ld bytes\n", getName(), (long) soexport);
    printf("%-13s: relocs       : %8ld bytes\n", getName(), (long) soxrelocs);
#endif

    // verify
    verifyOverlappingDecompression();

    // copy the overlay
    copyOverlay(fo, overlay, &obuf);

    // finally check the compression ratio
    if (!checkFinalCompressionRatio(fo))
        throwNotCompressible();
}