Exemplo n.º 1
0
/* this function set proper values to *gp based on s; in case it returns
 * gp->code == UNI_EXTRA_STRING then the caller is responsible for freeing
 * gp->unicode_seq too */
static void set_glyph_unicode(char *s, glyph_unicode_entry * gp)
{
    char buf[SMALL_BUF_SIZE], buf2[SMALL_BUF_SIZE], *p;
    long code;
    boolean last_component;
    glyph_unicode_entry tmp, *ptmp;

    /* skip dummy entries */
    if (s == NULL || s == notdef)
        return;

    /* strip everything after the first dot */
    p = strchr(s, '.');
    if (p != NULL) {
        *buf = 0;
        strncat(buf, s, p - s);
        s = buf;
    }

    if (strlen(s) == 0)
        return;

    /* check for case of multiple components separated by '_' */
    p = strchr(s, '_');
    if (p != NULL) {
        assert(strlen(s) < sizeof(buf));
        if (s != buf) {
            strcpy(buf, s);
            p = strchr(buf, '_');
            s = buf;
        }
        *buf2 = 0;
        last_component = false;
        for (;;) {
            *p = 0;
            tmp.code = UNI_UNDEF;
            set_glyph_unicode(s, &tmp);
            switch (tmp.code) {
            case UNI_UNDEF:    /* not found, do nothing */
                break;
            case UNI_STRING:   /* s matched an entry with string value in the database */
                assert(tmp.unicode_seq != NULL);
                assert(strlen(buf2) + strlen(tmp.unicode_seq) < sizeof(buf2));
                strcat(buf2, tmp.unicode_seq);
                break;
            case UNI_EXTRA_STRING:     /* s is a multiple value of form "uniXXXX" */
                assert(strlen(buf2) + strlen(tmp.unicode_seq) < sizeof(buf2));
                strcat(buf2, tmp.unicode_seq);
                xfree(tmp.unicode_seq);
                break;
            default:           /* s matched an entry with numeric value in the
                                   database, or a value derived from "uXXXX" */
                assert(tmp.code >= 0);
                strcat(buf2, utf16be_str(tmp.code));
            }
            if (last_component)
                break;
            s = p + 1;
            p = strchr(s, '_');
            if (p == NULL) {
                p = strend(s);
                last_component = true;
            }
        }
        gp->code = UNI_EXTRA_STRING;
        gp->unicode_seq = xstrdup(buf2);
        return;
    }

    /* lookup for glyph name in the database */
    tmp.name = s;
    tmp.code = UNI_UNDEF;
    ptmp = (glyph_unicode_entry *) avl_find(glyph_unicode_tree, &tmp);
    if (ptmp != NULL) {
        gp->code = ptmp->code;
        gp->unicode_seq = ptmp->unicode_seq;
        return;
    }

    /* check for case of "uniXXXX" (multiple 4-hex-digit values allowed) */
    if (str_prefix(s, "uni")) {
        p = s + strlen("uni");
        code = check_unicode_value(p, true);
        if (code != UNI_UNDEF) {
            if (strlen(p) == 4) /* single value */
                gp->code = code;
            else {              /* multiple value */
                gp->code = UNI_EXTRA_STRING;
                gp->unicode_seq = xstrdup(p);
            }
        }
        return;                 /* since the last case cannot happen */
    }

    /* check for case of "uXXXX" (single value up to 6 hex digits) */
    if (str_prefix(s, "u")) {
        p = s + strlen("u");
        code = check_unicode_value(p, false);
        if (code != UNI_UNDEF) {
            assert(code >= 0);
            gp->code = code;
        }
    }
}
Exemplo n.º 2
0
integer write_tounicode(char **glyph_names, char *name)
{
    char buf[SMALL_BUF_SIZE], *p;
    static char builtin_suffix[] = "-builtin";
    short range_size[257];
    glyph_unicode_entry gtab[257];
    integer objnum;
    int i, j;
    int bfchar_count, bfrange_count, subrange_count;
    assert(strlen(name) + strlen(builtin_suffix) < SMALL_BUF_SIZE);
    if (glyph_unicode_tree == NULL) {
        pdftex_warn("no GlyphToUnicode entry has been inserted yet!");
        fixedgentounicode = 0;
        return 0;
    }
    strcpy(buf, name);
    if ((p = strrchr(buf, '.')) != NULL && strcmp(p, ".enc") == 0)
        *p = 0;                 /* strip ".enc" from encoding name */
    else
        strcat(buf, builtin_suffix);    /* ".enc" not present, this is a builtin
                                           encoding so the name is eg "cmr10-builtin" */
    objnum = pdfnewobjnum();
    pdfbegindict(objnum, 0);
    pdfbeginstream();
    pdf_printf("%%!PS-Adobe-3.0 Resource-CMap\n"
               "%%%%DocumentNeededResources: ProcSet (CIDInit)\n"
               "%%%%IncludeResource: ProcSet (CIDInit)\n"
               "%%%%BeginResource: CMap (TeX-%s-0)\n"
               "%%%%Title: (TeX-%s-0 TeX %s 0)\n"
               "%%%%Version: 1.000\n"
               "%%%%EndComments\n"
               "/CIDInit /ProcSet findresource begin\n"
               "12 dict begin\n"
               "begincmap\n"
               "/CIDSystemInfo\n"
               "<< /Registry (TeX)\n"
               "/Ordering (%s)\n"
               "/Supplement 0\n"
               ">> def\n"
               "/CMapName /TeX-%s-0 def\n"
               "/CMapType 2 def\n"
               "1 begincodespacerange\n"
               "<00> <FF>\n" "endcodespacerange\n", buf, buf, buf, buf, buf);

    /* set gtab */
    for (i = 0; i < 256; ++i) {
        gtab[i].code = UNI_UNDEF;
        set_glyph_unicode(glyph_names[i], &gtab[i]);
    }
    gtab[256].code = UNI_UNDEF;

    /* set range_size */
    for (i = 0; i < 256;) {
        if (gtab[i].code == UNI_STRING || gtab[i].code == UNI_EXTRA_STRING) {
            range_size[i] = 1;  /* single entry */
            i++;
        } else if (gtab[i].code == UNI_UNDEF) {
            range_size[i] = 0;  /* no entry */
            i++;
        } else {                /* gtab[i].code >= 0 */
            j = i;
            while (i < 256 && gtab[i + 1].code >= 0 &&
                   gtab[i].code + 1 == gtab[i + 1].code)
                i++;
            /* at this point i is the last entry of the subrange */
            i++;                /* move i to the next entry */
            range_size[j] = i - j;
        }
    }

    /* calculate bfrange_count and bfchar_count */
    bfrange_count = 0;
    bfchar_count = 0;
    for (i = 0; i < 256;) {
        if (range_size[i] == 1) {
            bfchar_count++;
            i++;
        } else if (range_size[i] > 1) {
            bfrange_count++;
            i += range_size[i];
        } else
            i++;
    }

    /* write out bfrange */
    i = 0;
  write_bfrange:
    if (bfrange_count > 100)
        subrange_count = 100;
    else
        subrange_count = bfrange_count;
    bfrange_count -= subrange_count;
    pdf_printf("%i beginbfrange\n", subrange_count);
    for (j = 0; j < subrange_count; j++) {
        while (range_size[i] <= 1 && i < 256)
            i++;
        assert(i < 256);
        pdf_printf("<%02X> <%02X> <%s>\n", i, i + range_size[i] - 1,
                   utf16be_str(gtab[i].code));
        i += range_size[i];
    }
    pdf_printf("endbfrange\n");
    if (bfrange_count > 0)
        goto write_bfrange;

    /* write out bfchar */
    i = 0;
  write_bfchar:
    if (bfchar_count > 100)
        subrange_count = 100;
    else
        subrange_count = bfchar_count;
    bfchar_count -= subrange_count;
    pdf_printf("%i beginbfchar\n", subrange_count);
    for (j = 0; j < subrange_count; j++) {
        while (i < 256) {
            if (range_size[i] > 1)
                i += range_size[i];
            else if (range_size[i] == 0)
                i++;
            else                /* range_size[i] == 1 */
                break;
        }
        assert(i < 256 && gtab[i].code != UNI_UNDEF);
        if (gtab[i].code == UNI_STRING || gtab[i].code == UNI_EXTRA_STRING) {
            assert(gtab[i].unicode_seq != NULL);
            pdf_printf("<%02X> <%s>\n", i, gtab[i].unicode_seq);
        } else
            pdf_printf("<%02X> <%s>\n", i, utf16be_str(gtab[i].code));
        i++;
    }
    pdf_printf("endbfchar\n");
    if (bfchar_count > 0)
        goto write_bfchar;

    /* free strings allocated by set_glyph_unicode() */
    for (i = 0; i < 256; ++i) {
        if (gtab[i].code == UNI_EXTRA_STRING)
            xfree(gtab[i].unicode_seq);
    }

    pdf_printf("endcmap\n"
               "CMapName currentdict /CMap defineresource pop\n"
               "end\n" "end\n" "%%%%EndResource\n" "%%%%EOF\n");
    pdfendstream();
    return objnum;
}
Exemplo n.º 3
0
/* this function writes /ToUnicode data to *gp based on glyph name s and
 * taking into account tfmname; in case it returns
 * gp->code == UNI_EXTRA_STRING then the caller is responsible for freeing
 * gp->unicode_seq too */
static void set_glyph_unicode(const char *s, const char* tfmname, 
                              glyph_unicode_entry *gp)
{
    char buf[SMALL_BUF_SIZE], buf2[SMALL_BUF_SIZE], *p;
    const char *p2; /* p2 points in s; p above points in writable copies */
    long code;
    boolean last_component;
    glyph_unicode_entry tmp, *ptmp;

    /* skip dummy entries */
    if (s == NULL || s == notdef)
        return;

    /* strip everything after the first dot */
#if defined(MIKTEX)
    p = strchr(const_cast<char*>(s), '.');
#else
    p = strchr(s, '.');
#endif
    if (p != NULL) {
        *buf = 0;
        strncat(buf, s, p - s);
        s = buf;
    }

    if (strlen(s) == 0)
        return;

    /* check for case of multiple components separated by '_' */
#if defined(MIKTEX)
    p = strchr(const_cast<char*>(s), '_');
#else
    p = strchr(s, '_');
#endif
    if (p != NULL) {
        assert(strlen(s) < sizeof(buf));
        if (s != buf) {
            strcpy(buf, s);
            p = strchr(buf, '_');
            s = buf;
        }
        *buf2 = 0;
        last_component = false;
        for (;;) {
            *p = 0;
            tmp.code = UNI_UNDEF;
            set_glyph_unicode(s, tfmname, &tmp);
            switch (tmp.code) {
            case UNI_UNDEF:    /* not found, do nothing */
                break;
            case UNI_STRING:   /* s matched an entry with string value in the database */
                assert(tmp.unicode_seq != NULL);
                assert(strlen(buf2) + strlen(tmp.unicode_seq) < sizeof(buf2));
                strcat(buf2, tmp.unicode_seq);
                break;
            case UNI_EXTRA_STRING:     /* s is a multiple value of form "uniXXXX" */
                assert(strlen(buf2) + strlen(tmp.unicode_seq) < sizeof(buf2));
                strcat(buf2, tmp.unicode_seq);
                xfree(tmp.unicode_seq);
                break;
            default:           /* s matched an entry with numeric value in the
                                   database, or a value derived from "uXXXX" */
                assert(tmp.code >= 0);
                strcat(buf2, utf16be_str(tmp.code));
            }
            if (last_component)
                break;
            s = p + 1;
#if defined(MIKTEX)
            p = strchr(const_cast<char*>(s), '_');
#else
            p = strchr(s, '_');
#endif
            if (p == NULL) {
#if defined(MIKTEX)
                p = strend(const_cast<char*>(s));
#else
                p = strend(s);
#endif
                last_component = true;
            }
        }
        gp->code = UNI_EXTRA_STRING;
        gp->unicode_seq = xstrdup(buf2);
        return;
    }

    /* Glyph name search strategy: first look up the glyph name in the
       tfm's namespace, failing that look it up in the main database. */
    /* Note: buf may alias s in the code below, but s and buf2 are
       guaranteed to be distinct because the code changing buf2 above
       always returns before reaching the code below. */

    /* lookup for glyph name in the tfm's namespace */
    snprintf(buf2, SMALL_BUF_SIZE, "tfm:%s/%s", tfmname, s);
    tmp.name = buf2;
    tmp.code = UNI_UNDEF;
    ptmp = (glyph_unicode_entry *) avl_find(glyph_unicode_tree, &tmp);
    if (ptmp != NULL) {
        gp->code = ptmp->code;
        gp->unicode_seq = ptmp->unicode_seq;
        return;
    }

    /* lookup for glyph name in the main database */
    snprintf(buf2, SMALL_BUF_SIZE, "%s", s);
    tmp.name = buf2;
    tmp.code = UNI_UNDEF;
    ptmp = (glyph_unicode_entry *) avl_find(glyph_unicode_tree, &tmp);
    if (ptmp != NULL) {
        gp->code = ptmp->code;
        gp->unicode_seq = ptmp->unicode_seq;
        return;
    }

    /* check for case of "uniXXXX" (multiple 4-hex-digit values allowed) */
    if (str_prefix(s, "uni")) {
        p2 = s + strlen("uni");
        code = check_unicode_value(p2, true);
        if (code != UNI_UNDEF) {
            if (strlen(p2) == 4) /* single value */
                gp->code = code;
            else {              /* multiple value */
                gp->code = UNI_EXTRA_STRING;
                gp->unicode_seq = xstrdup(p2);
            }
        }
        return;                 /* since the last case cannot happen */
    }

    /* check for case of "uXXXX" (single value up to 6 hex digits) */
    if (str_prefix(s, "u")) {
        p2 = s + strlen("u");
        code = check_unicode_value(p2, false);
        if (code != UNI_UNDEF) {
            assert(code >= 0);
            gp->code = code;
        }
    }
}