Exemplo n.º 1
0
static unsigned long read_iso5426(yaz_iconv_t cd, yaz_iconv_decoder_t d,
                               unsigned char *inp,
                               size_t inbytesleft, size_t *no_read)
{
    struct decoder_data *data = (struct decoder_data *) d->data;
    unsigned long x;
    if (data->comb_offset < data->comb_size)
    {
        *no_read = data->comb_no_read[data->comb_offset];
        x = data->comb_x[data->comb_offset];

        /* special case for double-diacritic combining characters,
           INVERTED BREVE and DOUBLE TILDE.
           We'll increment the no_read counter by 1, since we want to skip over
           the processing of the closing ligature character
        */
        /* this code is no longer necessary.. our handlers code in
           yaz_iso5426_?_conv (generated by charconv.tcl) now returns
           0 and no_read=1 when a sequence does not match the input.
           The SECOND HALFs in codetables.xml produces a non-existant
           entry in the conversion trie.. Hence when met, the input byte is
           skipped as it should (in yaz_iconv)
        */
#if 0
        if (x == 0x0361 || x == 0x0360)
            *no_read += 1;
#endif
        data->comb_offset++;
        return x;
    }

    data->comb_offset = 0;
    for (data->comb_size = 0; data->comb_size < 8; data->comb_size++)
    {
        int comb = 0;

        if (inbytesleft == 0 && data->comb_size)
        {
            yaz_iconv_set_errno(cd, YAZ_ICONV_EINVAL);
            x = 0;
            *no_read = 0;
            break;
        }
        x = yaz_read_iso5426_comb(cd, data, inp, inbytesleft, no_read, &comb);
        if (!comb || !x)
            break;
        data->comb_x[data->comb_size] = x;
        data->comb_no_read[data->comb_size] = *no_read;
        inp += *no_read;
        inbytesleft = inbytesleft - *no_read;
    }
    return x;
}
Exemplo n.º 2
0
static size_t write_advancegreek(yaz_iconv_t cd, yaz_iconv_encoder_t w,
                                 unsigned long x,
                                 char **outbuf, size_t *outbytesleft)
{
    size_t k = 0;
    unsigned char *out = (unsigned char*) *outbuf;
    if (*outbytesleft < 3)
    {
        yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);  /* not room for output */
        return (size_t)(-1);
    }
    switch (x)
    {
    case 0x03ac : out[k++]=0x9d; out[k++]=0x81; break;
    case 0x03ad : out[k++]=0x9d; out[k++]=0x85; break;
    case 0x03ae : out[k++]=0x9d; out[k++]=0x87; break;
    case 0x03af : out[k++]=0x9d; out[k++]=0x89; break;
    case 0x03cc : out[k++]=0x9d; out[k++]=0x8f; break;
    case 0x03cd : out[k++]=0x9d; out[k++]=0x95; break;
    case 0x03ce : out[k++]=0x9d; out[k++]=0x99; break;
    case 0x0390 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x89; break;
    case 0x03b0 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x95; break;
    case 0x0386 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x81; break;
    case 0x0388 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x85; break;
    case 0x0389 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x87; break;
    case 0x038a : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x89; break;
    case 0x038c : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x8f; break;
    case 0x038e : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x95; break;
    case 0x038f : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x99; break;
    case 0x03ca : out[k++]=0x9e; out[k++]=0x89; break;
    case 0x03cb : out[k++]=0x9e; out[k++]=0x95; break;
    case 0x03aa : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x89; break;
    case 0x03ab : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x95; break;
    case 0x0391 : out[k++]=0x9f; out[k++]=0x81; break;
    case 0x0392 : out[k++]=0x9f; out[k++]=0x82; break;
    case 0x0393 : out[k++]=0x9f; out[k++]=0x83; break;
    case 0x0394 : out[k++]=0x9f; out[k++]=0x84; break;
    case 0x0395 : out[k++]=0x9f; out[k++]=0x85; break;
    case 0x0396 : out[k++]=0x9f; out[k++]=0x86; break;
    case 0x0397 : out[k++]=0x9f; out[k++]=0x87; break;
    case 0x0398 : out[k++]=0x9f; out[k++]=0x88; break;
    case 0x0399 : out[k++]=0x9f; out[k++]=0x89; break;
    case 0x039a : out[k++]=0x9f; out[k++]=0x8a; break;
    case 0x039b : out[k++]=0x9f; out[k++]=0x8b; break;
    case 0x039c : out[k++]=0x9f; out[k++]=0x8c; break;
    case 0x039d : out[k++]=0x9f; out[k++]=0x8d; break;
    case 0x039e : out[k++]=0x9f; out[k++]=0x8e; break;
    case 0x039f : out[k++]=0x9f; out[k++]=0x8f; break;
    case 0x03a0 : out[k++]=0x9f; out[k++]=0x90; break;
    case 0x03a1 : out[k++]=0x9f; out[k++]=0x91; break;
    case 0x03a3 : out[k++]=0x9f; out[k++]=0x93; break;
    case 0x03a4 : out[k++]=0x9f; out[k++]=0x94; break;
    case 0x03a5 : out[k++]=0x9f; out[k++]=0x95; break;
    case 0x03a6 : out[k++]=0x9f; out[k++]=0x96; break;
    case 0x03a7 : out[k++]=0x9f; out[k++]=0x97; break;
    case 0x03a8 : out[k++]=0x9f; out[k++]=0x98; break;
    case 0x03a9 : out[k++]=0x9f; out[k++]=0x99; break;
    case 0x03b1 : out[k++]=0x81; break;
    case 0x03b2 : out[k++]=0x82; break;
    case 0x03b3 : out[k++]=0x83; break;
    case 0x03b4 : out[k++]=0x84; break;
    case 0x03b5 : out[k++]=0x85; break;
    case 0x03b6 : out[k++]=0x86; break;
    case 0x03b7 : out[k++]=0x87; break;
    case 0x03b8 : out[k++]=0x88; break;
    case 0x03b9 : out[k++]=0x89; break;
    case 0x03ba : out[k++]=0x8a; break;
    case 0x03bb : out[k++]=0x8b; break;
    case 0x03bc : out[k++]=0x8c; break;
    case 0x03bd : out[k++]=0x8d; break;
    case 0x03be : out[k++]=0x8e; break;
    case 0x03bf : out[k++]=0x8f; break;
    case 0x03c0 : out[k++]=0x90; break;
    case 0x03c1 : out[k++]=0x91; break;
    case 0x03c2 : out[k++]=0x92; break;
    case 0x03c3 : out[k++]=0x93; break;
    case 0x03c4 : out[k++]=0x94; break;
    case 0x03c5 : out[k++]=0x95; break;
    case 0x03c6 : out[k++]=0x96; break;
    case 0x03c7 : out[k++]=0x97; break;
    case 0x03c8 : out[k++]=0x98; break;
    case 0x03c9 : out[k++]=0x99; break;
    default:
        if (x > 255)
        {
            yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
            return (size_t) -1;
        }
        out[k++] = (unsigned char ) x;
        break;
    }
    *outbytesleft -= k;
    (*outbuf) += k;
    return 0;
}
Exemplo n.º 3
0
static unsigned long read_advancegreek(yaz_iconv_t cd, yaz_iconv_decoder_t d,
                                       unsigned char *inp,
                                       size_t inbytesleft, size_t *no_read)
{
    unsigned long x = 0;
    int shift = 0;
    int tonos = 0;
    int dialitika = 0;

    *no_read = 0;
    while (inbytesleft > 0)
    {
        if (*inp == 0x9d)
        {
            tonos = 1;
        }
        else if (*inp == 0x9e)
        {
            dialitika = 1;
        }
        else if (*inp == 0x9f)
        {
            shift = 1;
        }
        else
            break;
        inp++;
        --inbytesleft;
        (*no_read)++;
    }
    if (inbytesleft == 0)
    {
        yaz_iconv_set_errno(cd, YAZ_ICONV_EINVAL); /* incomplete input */
        *no_read = 0;
        return 0;
    }
    switch (*inp) {
    case 0x81:
        if (shift)
            if (tonos)
                x = 0x0386;
            else
                x = 0x0391;
        else
            if (tonos)
                x = 0x03ac;
            else
                x = 0x03b1;
        break;
    case 0x82:
        if (shift)
            x = 0x0392;
        else
            x = 0x03b2;

        break;
    case 0x83:
        if (shift)
            x = 0x0393;
        else
            x = 0x03b3;
        break;
    case 0x84:
        if (shift)
            x = 0x0394;
        else
            x = 0x03b4;
        break;
    case 0x85:
        if (shift)
            if (tonos)
                x = 0x0388;
            else
                x = 0x0395;
        else
            if (tonos)
                x = 0x03ad;
            else
                x = 0x03b5;
        break;
    case 0x86:
        if (shift)
            x = 0x0396;
        else
            x = 0x03b6;
        break;
    case 0x87:
        if (shift)
            if (tonos)
                x = 0x0389;
            else
                x = 0x0397;
        else
            if (tonos)
                x = 0x03ae;
            else
                x = 0x03b7;
        break;
    case 0x88:
        if (shift)
            x = 0x0398;
        else
            x = 0x03b8;
        break;
    case 0x89:
        if (shift)
            if (tonos)
                x = 0x038a;
            else
                if (dialitika)
                    x = 0x03aa;
                else
                    x = 0x0399;
        else
            if (tonos)
                if (dialitika)
                    x = 0x0390;
                else
                    x = 0x03af;

            else
                if (dialitika)
                    x = 0x03ca;
                else
                    x = 0x03b9;
        break;
    case 0x8a:
        if (shift)
            x = 0x039a;
        else
            x = 0x03ba;

        break;
    case 0x8b:
        if (shift)
            x = 0x039b;
        else
            x = 0x03bb;
        break;
    case 0x8c:
        if (shift)
            x = 0x039c;
        else
            x = 0x03bc;

        break;
    case 0x8d:
        if (shift)
            x = 0x039d;
        else
            x = 0x03bd;
        break;
    case 0x8e:
        if (shift)
            x = 0x039e;
        else
            x = 0x03be;
        break;
    case 0x8f:
        if (shift)
            if (tonos)
                x = 0x038c;
            else
                x = 0x039f;
        else
            if (tonos)
                x = 0x03cc;
            else
                x = 0x03bf;
        break;
    case 0x90:
        if (shift)
            x = 0x03a0;
        else
            x = 0x03c0;
        break;
    case 0x91:
        if (shift)
            x = 0x03a1;
        else
            x = 0x03c1;
        break;
    case 0x92:
        x = 0x03c2;
        break;
    case 0x93:
        if (shift)
            x = 0x03a3;
        else
            x = 0x03c3;
        break;
    case 0x94:
        if (shift)
            x = 0x03a4;
        else
            x = 0x03c4;
        break;
    case 0x95:
        if (shift)
            if (tonos)
                x = 0x038e;
            else
                if (dialitika)
                    x = 0x03ab;
                else
                    x = 0x03a5;
        else
            if (tonos)
                if (dialitika)
                    x = 0x03b0;
                else
                    x = 0x03cd;

            else
                if (dialitika)
                    x = 0x03cb;
                else
                    x = 0x03c5;
        break;
    case 0x96:
        if (shift)
            x = 0x03a6;
        else
            x = 0x03c6;
        break;
    case 0x97:
        if (shift)
            x = 0x03a7;
        else
            x = 0x03c7;
        break;
    case 0x98:
        if (shift)
            x = 0x03a8;
        else
            x = 0x03c8;

        break;

    case 0x99:
        if (shift)
            if (tonos)
                x = 0x038f;
            else
                x = 0x03a9;
        else
            if (tonos)
                x = 0x03ce;
            else
                x = 0x03c9;
        break;
    default:
        x = *inp;
        break;
    }
    (*no_read)++;

    return x;
}
Exemplo n.º 4
0
static unsigned long yaz_read_iso5426_comb(yaz_iconv_t cd,
                                         struct decoder_data *data,
                                         unsigned char *inp,
                                         size_t inbytesleft, size_t *no_read,
                                         int *comb)
{
    *no_read = 0;
    while (inbytesleft > 0 && *inp == 27)
    {
        int *modep = &data->g0_mode;
        size_t inbytesleft0 = inbytesleft;

        inbytesleft--;
        inp++;
        if (inbytesleft == 0)
            goto incomplete;
        if (*inp == '$') /* set with multiple bytes */
        {
            inbytesleft--;
            inp++;
        }
        if (inbytesleft == 0)
            goto incomplete;
        if (*inp == '(' || *inp == ',')  /* G0 */
        {
            inbytesleft--;
            inp++;
        }
        else if (*inp == ')' || *inp == '-') /* G1 */
        {
            inbytesleft--;
            inp++;
            modep = &data->g1_mode;
        }
        if (inbytesleft == 0)
            goto incomplete;
        if (*inp == '!') /* ANSEL is a special case */
        {
            inbytesleft--;
            inp++;
        }
        if (inbytesleft == 0)
            goto incomplete;
        *modep = *inp++; /* Final character */
        inbytesleft--;

        (*no_read) += inbytesleft0 - inbytesleft;
    }
    if (inbytesleft == 0)
        return 0;
    else if (*inp == ' ')
    {
        *no_read += 1;
        return ' ';
    }
    else
    {
        unsigned long x;
        size_t no_read_sub = 0;
        int mode = *inp < 128 ? data->g0_mode : data->g1_mode;
        *comb = 0;

        switch(mode)
        {
        case 'B':  /* Basic ASCII */
        case 's':  /* ASCII */
            x = yaz_iso5426_42_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
            break;
        case 'E':  /* ANSEL */
            x = yaz_iso5426_45_conv(inp, inbytesleft, &no_read_sub, comb, 127, 128);
            break;

#if 0
        case 'g':  /* Greek */
            x = yaz_iso5426_67_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
            break;
        case 'b':  /* Subscripts */
            x = yaz_iso5426_62_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
            break;
        case 'p':  /* Superscripts */
            x = yaz_iso5426_70_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
            break;
        case '2':  /* Basic Hebrew */
            x = yaz_iso5426_32_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
            break;
        case 'N':  /* Basic Cyrillic */
            x = yaz_iso5426_4E_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
            break;
        case 'Q':  /* Extended Cyrillic */
            x = yaz_iso5426_51_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
            break;
        case '3':  /* Basic Arabic */
            x = yaz_iso5426_33_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
            break;
        case '4':  /* Extended Arabic */
            x = yaz_iso5426_34_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
            break;
        case 'S':  /* Greek */
            x = yaz_iso5426_53_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
            break;
        case '1':  /* Chinese, Japanese, Korean (EACC) */
            x = yaz_iso5426_31_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
            break;
#endif
        default:
            *no_read = 0;
            yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
            return 0;
        }
        *no_read += no_read_sub;
        return x;
    }
incomplete:
    *no_read = 0;
    yaz_iconv_set_errno(cd, YAZ_ICONV_EINVAL);
    return 0;
}