示例#1
0
文件: utf7.c 项目: mloar/charset
static void read_utf7(charset_spec const *charset, long int input_chr,
                      charset_state *state,
                      void (*emit)(void *ctx, long int output), void *emitctx)
{
    long int hw;

    UNUSEDARG(charset);

    /*
     * state->s0 is used to handle the conversion of the UTF-7
     * transport format into a stream of halfwords. Its layout is:
     *
     *  - In normal ASCII mode, it is zero.
     *
     * 	- Otherwise, it holds a leading 1 followed by all the bits
     * 	  so far accumulated in base64 digits.
     *
     * 	- Special case: when we have only just seen the initial `+'
     * 	  which enters base64 mode, it is set to 2 rather than 1
     * 	  (this is an otherwise unused value since base64 always
     * 	  accumulates an even number of bits at a time), so that
     * 	  the special sequence `+-' can be made to encode `+'
     * 	  easily.
     *
     * state->s1 is used to handle the conversion of those
     * halfwords into Unicode values. It contains a high surrogate
     * value if we've just seen one, and 0 otherwise.
     */

    if (!state->s0) {
        if (input_chr == '+')
            state->s0 = 2;
        else
            emit(emitctx, input_chr);
        return;
    } else {
        if (!SET_B(input_chr)) {
            /*
             * base64 mode ends here. Emit the character we have,
             * unless it's a minus in which case we should swallow
             * it.
             */
            if (input_chr != '-')
                emit(emitctx, input_chr);
            else if (state->s0 == 2)
                emit(emitctx, '+');    /* special case */
            state->s0 = 0;
            return;
        }

        /*
         * Now we have a base64 character, so add it to our state,
         * first correcting the special case value of s0.
         */
        if (state->s0 == 2)
            state->s0 = 1;
        state->s0 = (state->s0 << 6) | base64_value(input_chr);
    }

    /*
     * If we don't have a whole halfword at this point, bale out.
     */
    if (!(state->s0 & 0xFFFF0000))
        return;

    /*
     * Otherwise, extract the halfword. There are three
     * possibilities for where the top set bit might be.
     */
    if (state->s0 & 0x00100000) {
        hw = (state->s0 >> 4) & 0xFFFF;
        state->s0 = (state->s0 & 0xF) | 0x10;
    } else if (state->s0 & 0x00040000) {
示例#2
0
bool process_char(char c) {
    bool isDigit = (('0'<=c) && (c<='9'));
    bool isWhite = ((' ' == c)||('\t'==c));
    uint8_t b = 0;

    // ignore '\r'
    if (('\r' == c))
        return TRUE;
    // at end-of-line, command is complete => call process_command
    if (('\n' == c)) {
#ifdef DEBUG
        uint8_t i;
        for(i=0;i<31;i++) {
            if (codes_seen & ((uint32_t)1 << i)) {
                if (numbers_got & ((uint32_t)1 << i)) {
                    LOG_STRING("P: TOKEN ");LOG_CHAR('A'+i);LOG_S32(numbers[i]);LOG_NEWLINE;
                } else {
                    LOG_STRING("P: TOKEN ");LOG_CHAR('A'+i);LOG_NEWLINE;
                }
            }
        }
#endif
        if (base64_len) {
            LOG_STRING("P: TOKEN $ (");LOG_U8(base64_len);LOG_STRING("Bytes)\n");
            // if Stepper queue is empty, transfer modulation data now, else later
            if (STEPPER_QUEUE_is_empty()) {
                for(b=0;b<base64_len;b++)
                    LASER_RASTERDATA_put(base64_bytes[b]);
                base64_len=b=0;
            }
        }
        if (state != ERROR_STATE) {
            LOG_STRING("P: PROCESS COMMAND\n");
            process_command();
            // if stepper queue was not empty before, transfer modulation now
            if(base64_len) {
                for(b=0;b<base64_len;b++)
                    LASER_RASTERDATA_put(base64_bytes[b]);
                base64_len=0;
            }
        } else
            LOG_STRING("P: ERROR-STATE: IGNORE COMMAND!\n");
        // re-init parser
        codes_seen = 0;
        numbers_got = 0;
        state = EXPECT_FIRST_LETTER;
        return state != ERROR_STATE;
    }
    // XXX update checksum

    // state dependent interpretation of characters
    switch(state) {
        case EXPECT_FIRST_LETTER:
            codes_seen = 0;
            numbers_got = 0;
            memset(numbers, 0, sizeof(numbers));
            memset(integers, 0, sizeof(integers));
            memset(base64_bytes, 0, sizeof(base64_bytes));
            memset(filename, 0, sizeof(filename));
            filename_len = 0;
            base64_len = 0;
            state = EXPECT_LETTER;
            // intentionally no break !
        case EXPECT_LETTER:
            if ((('A'<=c) && (c<='Z'))) {
                last_letter = c-'A';
                codes_seen |= ((uint32_t)1) << last_letter;
                state = EXPECT_NUMBER_OR_SIGN;
                return TRUE;
            } else if (isWhite) { // ignore whitespace
                return TRUE;
            } else if ('*' == c) {
                state = PARSE_CHECKSUM;
                return TRUE;
            } else if (';' == c) {
                state = IGNORE_REST;
                return TRUE;
            } else if ('(' == c) {
                state = COMMENT_MODE;
                return TRUE;
            } else if ('$' == c) {
                state = EXPECT_BASE64_1;
                return TRUE;
            } else if ('\'' == c) {
                state = PARSE_FILENAME_TICKS;
                return TRUE;
            } else if ('"' == c) {
                state = PARSE_FILENAME_DOUBLETICKS;
                return TRUE;
            } else if (!filename_len) {
                state = PARSE_FILENAME;
                filename[filename_len++] = c;
                filename[filename_len] = 0;
                return TRUE;
            }
            LOG_PARSE_ERROR("unexpected character and filename already set!");
            break;
        case EXPECT_NUMBER_OR_SIGN:
            if (isWhite) {
                state = EXPECT_LETTER;
                return TRUE;
            } else if (!(isDigit || (c == '+') || (c=='-'))) {
                if (filename_len) {
                    LOG_PARSE_ERROR("filename already set: unexpected character found");
                }
                state = PARSE_FILENAME;
                filename[filename_len++] = last_letter + 'A';
                filename[filename_len++] = c;
                filename[filename_len] = 0;
                return TRUE;
            }
            state = EXPECT_FIRST_DIGIT;
            current_int = 0;
            digits = 0;
            subdigits = 0;
            if (c == '-') {
                isNegative = TRUE;
                return TRUE;
            }
            isNegative = FALSE;
            if (c == '+')    // needless, but valid
                return TRUE;
            // intentionally no break!
        case EXPECT_FIRST_DIGIT:    // first digit of a number
            if (isWhite) {
                state = EXPECT_LETTER;
                return TRUE;
            } else if (!isDigit)
                LOG_PARSE_ERROR("Expected [0-9\\w]");
            current_int = (uint8_t) c - '0';
            digits++;
            state = EXPECT_ANOTHERDIGIT;
            // fall through to number storage
            break;
        case EXPECT_ANOTHERDIGIT: // digits of a number before '.' or 'eE'
            if (isDigit) {
                if (digits>9)
                    LOG_PARSE_ERROR("Too many leading digits!");
                times_ten(current_int);
                current_int += (uint8_t) (c - '0');
                digits++;
                // fall through to number storage
                break;
            } else if ('.' == c) {
                state = EXPECT_SUBDIGITS;
                break;
            } else if (isWhite) {
                state = EXPECT_LETTER;
                break;
            } else if ('*' == c) {
                state = PARSE_CHECKSUM;
                break;
            } else if (';' == c) {
                state = IGNORE_REST;
                break;
            } else if ('(' == c) {
                state = COMMENT_MODE;
                break;
            } else if ('$' == c) {
                state = EXPECT_BASE64_1;
                break;
            } else if ('\'' == c) {
                state = PARSE_FILENAME_TICKS;
                break;
            } else if ('"' == c) {
                state = PARSE_FILENAME_DOUBLETICKS;
                break;
            } else
                LOG_PARSE_ERROR("Expected [0-9.\\w]");

        case EXPECT_SUBDIGITS:    // digits of a number after '.'
            if (isDigit) {
                if (subdigits >= SCALE_DIGITS) // ignore further digits
                    return TRUE;
                if (digits+subdigits > 9) //capacity exceeded!
                    //~ LOG_PARSE_ERROR("Too many total digits!");
                    return TRUE; // ignore further digits
                times_ten(current_int);
                current_int += (uint8_t) (c - '0');
                subdigits++;
                // fall through to number storage
                break;
            } else if (isWhite) {
                state = EXPECT_LETTER;
                return TRUE;
            } else
                LOG_PARSE_ERROR("Expected [0-9\\w]");
        case EXPECT_BASE64_1:    // expect first char of a base64-string
            if (isWhite) {
                state = EXPECT_LETTER;
                return TRUE;
            }
            b = base64_value(c);
            if (b > 63) {
                state = IGNORE_REST;
                LOG_PARSE_ERROR("Expected a BASE64 character");
            }
            base64_bytes[base64_len] = b<<2;
            state = EXPECT_BASE64_2;
            return TRUE;
        case EXPECT_BASE64_2:    // expect second char of a base64-string
            b = base64_value(c);
            if (b > 63) {
                state = IGNORE_REST;
                LOG_PARSE_ERROR("Expected a BASE64 character");
            }
            base64_bytes[base64_len++] |= (b >> 4);
            if (base64_len >= sizeof(base64_bytes)) {
                state = IGNORE_REST;
                LOG_PARSE_ERROR("Too many Base64 Bytes (Buffer full)");
            }
            base64_bytes[base64_len] = (b << 4);
            state = EXPECT_BASE64_3;
            return TRUE;
        case EXPECT_BASE64_3:    // expect third char of a base64-string (may be '=')
            if ('=' != c) {
                b = base64_value(c);
                if (b > 63) {
                    state = IGNORE_REST;
                    LOG_PARSE_ERROR("Expected a BASE64 character");
                }
                base64_bytes[base64_len++] |= (b >> 2);
                if (base64_len >= sizeof(base64_bytes)) {
                    state = IGNORE_REST;
                    LOG_PARSE_ERROR("Too many Base64 Bytes (Buffer full)");
                }
                base64_bytes[base64_len] = (b << 6);
            }
            state = EXPECT_BASE64_4;
            return TRUE;
        case EXPECT_BASE64_4:    // expect fourth char of a base64-string (may be '=')
            if ('=' != c) {
                b = base64_value(c);
                if (b > 63) {
                    state = IGNORE_REST;
                    LOG_PARSE_ERROR("Expected a BASE64 character");
                }
                base64_bytes[base64_len++] |= b;
                if (base64_len >= sizeof(base64_bytes)) {
                    state = IGNORE_REST;
                    LOG_PARSE_ERROR("Too many Base64 Bytes (Buffer full)");
                }
            }
            state = EXPECT_BASE64_1;
            return TRUE;
        case COMMENT_MODE:    // inside comment mode ()
            // just eat everything between '(' and ')'
            if (c == ')') {
                state = EXPECT_LETTER;
            }
            return TRUE;
        case ERROR_STATE: // after an error, ignore until end of line and do not process_command at eol!
            return FALSE;
        case PARSE_CHECKSUM: // after a '*': parse digits of the checksum (untile end of line)
            // ignored.
        case IGNORE_REST:    // after a ; (comment until end of line)
            // just eat everything after a ';'
            return TRUE;
        case PARSE_FILENAME_DOUBLETICKS: // parse filename inside double ticks ""
            if ('"' == c) {
                state = EXPECT_LETTER;
                return TRUE;
            }
            b = filename_len;
            filename[b++] = c;
            filename[b] = 0;
            filename_len = b;
            return (filename_len < sizeof(filename));
        case PARSE_FILENAME_TICKS: // parse filename inside single ticks ''
            if ('\'' == c) {
                state = EXPECT_LETTER;
                return TRUE;
            }
            b = filename_len;
            filename[b++] = c;
            filename[b] = 0;
            filename_len = b;
            return (filename_len < sizeof(filename));
        case PARSE_FILENAME: // Characters which must be a filename
            if (isWhite) {
                state = EXPECT_LETTER;
                return TRUE;
            }
            b = filename_len;
            filename[b++] = c;
            filename[b] = 0;
            filename_len = b;
            return (filename_len < sizeof(filename));

        default:
            LOG_PARSE_ERROR("Unknown or undefined State");
    }