static bool transform_ibmpc_iconqnx (RECODE_SUBTASK subtask) { int input_char; input_char = get_byte (subtask); while (true) switch (input_char) { case DOS_EOF: RETURN_IF_NOGO (RECODE_NOT_CANONICAL, subtask); /* Fall through. */ case EOF: SUBTASK_RETURN (subtask); case 133: TRANSLATE_AND_BREAK ('A', 'a'); case 138: TRANSLATE_AND_BREAK ('A', 'e'); case 151: TRANSLATE_AND_BREAK ('A', 'u'); case 130: TRANSLATE_AND_BREAK ('B', 'e'); case 144: TRANSLATE_AND_BREAK ('B', 'E'); case 131: TRANSLATE_AND_BREAK ('C', 'a'); case 136: TRANSLATE_AND_BREAK ('C', 'e'); case 140: TRANSLATE_AND_BREAK ('C', 'i'); case 147: TRANSLATE_AND_BREAK ('C', 'o'); case 150: TRANSLATE_AND_BREAK ('C', 'u'); case 137: TRANSLATE_AND_BREAK ('H', 'e'); case 139: TRANSLATE_AND_BREAK ('H', 'i'); case 129: TRANSLATE_AND_BREAK ('H', 'u'); case 135: TRANSLATE_AND_BREAK ('K', 'c'); case 128: TRANSLATE_AND_BREAK ('K', 'C'); case DOS_CR: input_char = get_byte (subtask); if (input_char == DOS_LF) { put_byte (ENDLINE, subtask); input_char = get_byte (subtask); } else put_byte (DOS_CR, subtask); break; case ENDLINE: case ESCAPE: RETURN_IF_NOGO (RECODE_AMBIGUOUS_OUTPUT, subtask); /* Fall through. */ default: put_byte (input_char, subtask); input_char = get_byte (subtask); } }
static bool transform_mule_latin (RECODE_CONST_STEP step, RECODE_TASK task, unsigned prefix) { int character; while (character = get_byte (task), character != EOF) if (IS_ASCII (character)) put_byte (character, task); else if ((character & MASK (8)) == prefix) { character = get_byte (task); while ((character & MASK (8)) == prefix) { /* This happens in practice, sometimes, that Emacs goes a bit berzerk and generates strings of prefix characters. Remove all succeeding prefixes in a row. This is irreversible. */ RETURN_IF_NOGO (RECODE_NOT_CANONICAL, step, task); character = get_byte (task); } if (character == EOF) { RETURN_IF_NOGO (RECODE_INVALID_INPUT, step, task); break; } if (IS_ASCII (character)) RETURN_IF_NOGO (RECODE_NOT_CANONICAL, step, task); put_byte (character, task); } else RETURN_IF_NOGO (RECODE_UNTRANSLATABLE, step, task); TASK_RETURN (task); }
static bool transform_utf7_utf16 (RECODE_CONST_STEP step, RECODE_TASK task) { int character; unsigned value; unsigned split; character = get_byte (task); if (character != EOF && task->byte_order_mark) put_ucs2 (BYTE_ORDER_MARK, task); while (character != EOF) if (character == '+') { character = get_byte (task); while (IS_BASE64 (character)) { /* Process first byte of first quadruplet. */ value = base64_char_to_value[character] << 10; character = get_byte (task); /* Process second byte of first quadruplet. */ if (!IS_BASE64 (character)) { RETURN_IF_NOGO (RECODE_INVALID_INPUT, step, task); break; } value |= base64_char_to_value[character] << 4; character = get_byte (task); /* Process third byte of first quadruplet. */ if (!IS_BASE64 (character)) { RETURN_IF_NOGO (RECODE_INVALID_INPUT, step, task); break; } split = base64_char_to_value[character]; value |= split >> 2; if (IS_BODY_DIRECT (value)) RETURN_IF_NOGO (RECODE_NOT_CANONICAL, step, task); put_ucs2 (value, task); character = get_byte (task); /* Process fourth byte of first quadruplet. */ if (!IS_BASE64 (character)) { if (MASK (2) & split) RETURN_IF_NOGO (RECODE_INVALID_INPUT, step, task); break; } value = ((MASK (2) & split) << 14 | base64_char_to_value[character] << 8); character = get_byte (task); /* Process first byte of second quadruplet. */ if (!IS_BASE64 (character)) { RETURN_IF_NOGO (RECODE_INVALID_INPUT, step, task); break; } value |= base64_char_to_value[character] << 2; character = get_byte (task); /* Process second byte of second quadruplet. */ if (!IS_BASE64 (character)) { RETURN_IF_NOGO (RECODE_INVALID_INPUT, step, task); break; } split = base64_char_to_value[character]; value |= split >> 4; if (IS_BODY_DIRECT (value)) RETURN_IF_NOGO (RECODE_NOT_CANONICAL, step, task); put_ucs2 (value, task); character = get_byte (task); /* Process third byte of second quadruplet. */ if (!IS_BASE64 (character)) { if (MASK (4) & split) RETURN_IF_NOGO (RECODE_INVALID_INPUT, step, task); break; } value = ((MASK (4) & split) << 12 | base64_char_to_value[character] << 6); character = get_byte (task); /* Process fourth byte of second quadruplet. */ if (!IS_BASE64 (character)) { RETURN_IF_NOGO (RECODE_INVALID_INPUT, step, task); break; } value |= base64_char_to_value[character]; if (IS_BODY_DIRECT (value)) RETURN_IF_NOGO (RECODE_NOT_CANONICAL, step, task); put_ucs2 (value, task); character = get_byte (task); } if (character == '-') { character = get_byte (task); if (!IS_BASE64 (character)) RETURN_IF_NOGO (RECODE_NOT_CANONICAL, step, task); } }
static bool wrapped_transform (iconv_t conversion, RECODE_SUBTASK subtask) { char output_buffer[BUFFER_SIZE]; char input_buffer[BUFFER_SIZE]; int input_char = get_byte (subtask); char *cursor = input_buffer; bool drain_first = false; while (true) { /* The output buffer is fully avaible at this point. */ char *input = input_buffer; char *output = output_buffer; size_t input_left = 0; size_t output_left = BUFFER_SIZE; int saved_errno = 0; size_t converted; if (drain_first) { /* Drain all accumulated partial state and emit output to return to the initial shift state. */ converted = iconv (conversion, NULL, NULL, &output, &output_left); if (converted == (size_t) -1) saved_errno = errno; } if (saved_errno == 0) { /* Continue filling the input buffer. */ while (input_char != EOF && cursor < input_buffer + BUFFER_SIZE) { *cursor++ = input_char; input_char = get_byte (subtask); } if (cursor == input_buffer) { if (output == output_buffer) { /* All work has been done, just make sure we drained. */ if (drain_first) break; drain_first = true; continue; } } else { /* Convert accumulated input and add it to the output buffer. */ input = input_buffer; input_left = cursor - input_buffer; converted = iconv (conversion, &input, &input_left, &output, &output_left); if (converted == (size_t) -1) saved_errno = errno; } } /* Send the converted result, so freeing the output buffer. */ for (cursor = output_buffer; cursor < output; cursor++) put_byte (*cursor, subtask); /* Act according to the outcome of the iconv call. */ drain_first = false; if (saved_errno != 0 && saved_errno != E2BIG) { if (saved_errno == EILSEQ) { /* Invalid input. Skip one byte. */ RETURN_IF_NOGO (RECODE_INVALID_INPUT, subtask); assert (input_left > 0); input++; input_left--; /* Why is draining required? */ drain_first = true; } else if (saved_errno == EINVAL) { if (input + input_left < input_buffer + BUFFER_SIZE && input_char == EOF) /* Incomplete multibyte sequence at end of input. */ RETURN_IF_NOGO (RECODE_INVALID_INPUT, subtask); } else { recode_perror (subtask->task->request->outer, "iconv ()"); RETURN_IF_NOGO (RECODE_SYSTEM_ERROR, subtask); } } /* Move back any unprocessed part of the input buffer. */ for (cursor = input_buffer; input_left != 0; input_left--) *cursor++ = *input++; } SUBTASK_RETURN (subtask); }
bool get_ucs2 (unsigned *value, RECODE_SUBTASK subtask) { while (true) { int character1; int character2; unsigned chunk; character1 = get_byte (subtask); if (character1 == EOF) return false; character2 = get_byte (subtask); if (character2 == EOF) { SET_SUBTASK_ERROR (RECODE_INVALID_INPUT, subtask); return false; } switch (subtask->task->swap_input) { case RECODE_SWAP_UNDECIDED: chunk = ((MASK (8) & character1) << 8) | (MASK (8) & character2); switch (chunk) { case BYTE_ORDER_MARK: subtask->task->swap_input = RECODE_SWAP_NO; break; case BYTE_ORDER_MARK_SWAPPED: subtask->task->swap_input = RECODE_SWAP_YES; break; default: *value = chunk; subtask->task->swap_input = RECODE_SWAP_NO; if (subtask->task->byte_order_mark) RETURN_IF_NOGO (RECODE_NOT_CANONICAL, subtask); return true; } break; case RECODE_SWAP_NO: chunk = ((MASK (8) & character1) << 8) | (MASK (8) & character2); switch (chunk) { case BYTE_ORDER_MARK: RETURN_IF_NOGO (RECODE_NOT_CANONICAL, subtask); break; case BYTE_ORDER_MARK_SWAPPED: subtask->task->swap_input = RECODE_SWAP_YES; RETURN_IF_NOGO (RECODE_NOT_CANONICAL, subtask); break; default: *value = chunk; return true; } break; case RECODE_SWAP_YES: chunk = ((MASK (8) & character2) << 8) | (MASK (8) & character1); switch (chunk) { case BYTE_ORDER_MARK: RETURN_IF_NOGO (RECODE_NOT_CANONICAL, subtask); break; case BYTE_ORDER_MARK_SWAPPED: subtask->task->swap_input = RECODE_SWAP_NO; RETURN_IF_NOGO (RECODE_NOT_CANONICAL, subtask); break; default: *value = chunk; return true; } break; } } }
static bool transform_iconqnx_ibmpc (RECODE_SUBTASK subtask) { int input_char; /* current character */ input_char = get_byte (subtask); while (true) switch (input_char) { case EOF: SUBTASK_RETURN (subtask); case ENDLINE: put_byte (DOS_CR, subtask); put_byte (DOS_LF, subtask); input_char = get_byte (subtask); break; case DOS_CR: input_char = get_byte (subtask); if (input_char == DOS_LF) RETURN_IF_NOGO (RECODE_AMBIGUOUS_OUTPUT, subtask); put_byte (DOS_CR, subtask); break; case ESCAPE: input_char = get_byte (subtask); switch (input_char) { case 'A': input_char = get_byte (subtask); switch (input_char) { case 'a': input_char = 133; break; case 'e': input_char = 138; break; case 'u': input_char = 151; break; default: RETURN_IF_NOGO (RECODE_INVALID_INPUT, subtask); put_byte (ESCAPE, subtask); put_byte ('A', subtask); if (input_char == EOF) SUBTASK_RETURN (subtask); } break; case 'B': input_char = get_byte (subtask); switch (input_char) { case 'e': input_char = 130; break; case 'E': input_char = 144; break; default: RETURN_IF_NOGO (RECODE_INVALID_INPUT, subtask); put_byte (ESCAPE, subtask); put_byte ('B', subtask); if (input_char == EOF) SUBTASK_RETURN (subtask); } break; case 'C': input_char = get_byte (subtask); switch (input_char) { case 'a': input_char = 131; break; case 'e': input_char = 136; break; case 'i': input_char = 140; break; case 'o': input_char = 147; break; case 'u': input_char = 150; break; default: RETURN_IF_NOGO (RECODE_INVALID_INPUT, subtask); put_byte (ESCAPE, subtask); put_byte ('C', subtask); if (input_char == EOF) SUBTASK_RETURN (subtask); } break; case 'H': input_char = get_byte (subtask); switch (input_char) { case 'e': input_char = 137; break; case 'i': input_char = 139; break; case 'u': input_char = 129; break; default: RETURN_IF_NOGO (RECODE_INVALID_INPUT, subtask); put_byte (ESCAPE, subtask); put_byte ('H', subtask); if (input_char == EOF) SUBTASK_RETURN (subtask); } break; case 'K': input_char = get_byte (subtask); switch (input_char) { case 'c': input_char = 135; break; case 'C': input_char = 128; break; default: RETURN_IF_NOGO (RECODE_INVALID_INPUT, subtask); put_byte (ESCAPE, subtask); put_byte ('K', subtask); if (input_char == EOF) SUBTASK_RETURN (subtask); } break; default: RETURN_IF_NOGO (RECODE_INVALID_INPUT, subtask); put_byte (ESCAPE, subtask); if (input_char == EOF) SUBTASK_RETURN (subtask); } /* Fall through. */ default: put_byte (input_char, subtask); input_char = get_byte (subtask); } }