char * to_utf8(const char *src, char *dest) { unsigned char *p = (unsigned char *) dest; int len = 0, n; while ((n=get_unichar(&src)) > 0) { if (n >= 0x2000) { *p++ = 0xe0 | (n >> 12); *p++ = 0x80 | ((n >> 6) & 0x3f); *p++ = 0x80 | (n & 0x3f); } else if (n >= 0x80) {
int ivona_get_msgpart(struct dumbtts_conf *conf, SPDMessageType type, char **msg, char *icon, char **buf, int *len, int cap_mode, char *delimeters, int punct_mode, char *punct_some) { int rc; int isicon; int n, pos, bytes; wchar_t wc; char xbuf[1024]; if (!*msg) return 1; if (!**msg) return 1; isicon = 0; icon[0] = 0; if (*buf) **buf = 0; log_msg(OTTS_LOG_INFO, "Ivona message %s type %d\n", *msg, type); switch (type) { case SPD_MSGTYPE_SOUND_ICON: if (strlen(*msg) < 63) { strcpy(icon, *msg); rc = 0; } else { rc = 1; } *msg = NULL; return rc; case SPD_MSGTYPE_SPELL: wc = get_unichar(msg); if (!wc) { *msg = NULL; return 1; } n = dumbtts_WCharString(conf, wc, *buf, *len, cap_mode, &isicon); if (n > 0) { *len = n + 128; *buf = g_realloc(*buf, *len); n = dumbtts_WCharString(conf, wc, *buf, *len, cap_mode, &isicon); } if (n) { *msg = NULL; return 1; } if (isicon) strcpy(icon, "capital"); return 0; case SPD_MSGTYPE_KEY: case SPD_MSGTYPE_CHAR: if (type == SPD_MSGTYPE_KEY) { n = dumbtts_KeyString(conf, *msg, *buf, *len, cap_mode, &isicon); } else { n = dumbtts_CharString(conf, *msg, *buf, *len, cap_mode, &isicon); } log_msg(OTTS_LOG_INFO, "Got n=%d", n); if (n > 0) { *len = n + 128; *buf = g_realloc(*buf, *len); if (type == SPD_MSGTYPE_KEY) { n = dumbtts_KeyString(conf, *msg, *buf, *len, cap_mode, &isicon); } else { n = dumbtts_CharString(conf, *msg, *buf, *len, cap_mode, &isicon); } } *msg = NULL; if (!n && isicon) strcpy(icon, "capital"); return n; case SPD_MSGTYPE_TEXT: pos = 0; bytes = module_get_message_part(*msg, xbuf, &pos, 1023, delimeters); log_msg(OTTS_LOG_DEBUG, "Got bytes %d, %s", bytes, xbuf); if (bytes <= 0) { *msg = NULL; return 1; } *msg += pos; xbuf[bytes] = 0; n = dumbtts_GetString(conf, xbuf, *buf, *len, punct_mode, punct_some, ",.;:!?"); if (n > 0) { *len = n + 128; *buf = g_realloc(*buf, *len); n = dumbtts_GetString(conf, xbuf, *buf, *len, punct_mode, punct_some, ",.;:!?"); } if (n) { *msg = NULL; return 1; } log_msg(OTTS_LOG_INFO, "Returning to Ivona |%s|", *buf); return 0; default: *msg = NULL; log_msg(OTTS_LOG_WARN, "Unknown message type\n"); return 1; } }
int mbrpipe_get_msgpart(void *conf, SPDMessageType type, char **msg, char *icon, char **buf, int *len, int cap_mode, char *delimiters, int punct_mode, char *punct_some) { int rc; int isicon; int n, bytes; unsigned int pos; wchar_t wc; char xbuf[1024]; if (!*msg) return 1; if (!**msg) return 1; isicon = 0; icon[0] = 0; if (*buf) **buf = 0; log_msg(OTTS_LOG_DEBUG, MODULE_NAME": message %s type %d\n", *msg, type); if (!dumb_conf) { /* No dumbtts library? */ pos=0; bytes = module_get_message_part(*msg, xbuf, &pos, 1023, delimiters); log_msg(OTTS_LOG_DEBUG, MODULE_NAME": Got bytes %d, %s", bytes, xbuf); if (bytes <= 0) { *msg = NULL; return 1; } *msg += pos; xbuf[bytes] = 0; g_free(*buf); if (strcasecmp(MbrpipeSynthEncoding,"utf-8")) { *buf=g_convert(xbuf,-1,MbrpipeSynthEncoding,"UTF-8",NULL,NULL,NULL); *len=strlen(*buf); } else { *len=strlen(xbuf); *buf=g_strdup(xbuf); } return 0; } switch (type) { case SPD_MSGTYPE_SOUND_ICON: if (strlen(*msg) < 63) { strcpy(icon, *msg); rc = 0; } else { rc = 1; } *msg = NULL; return rc; case SPD_MSGTYPE_SPELL: wc = get_unichar(msg); if (!wc) { *msg = NULL; return 1; } n = dumbtts_WCharString(conf, wc, *buf, *len, cap_mode, &isicon); if (n > 0) { *len = n + 128; *buf = g_realloc(*buf, *len); n = dumbtts_WCharString(conf, wc, *buf, *len, cap_mode, &isicon); } if (n) { *msg = NULL; return 1; } if (isicon) strcpy(icon, "capital"); return 0; case SPD_MSGTYPE_KEY: case SPD_MSGTYPE_CHAR: if (type == SPD_MSGTYPE_KEY) { n = dumbtts_KeyString(conf, *msg, *buf, *len, cap_mode, &isicon); } else { n = dumbtts_CharString(conf, *msg, *buf, *len, cap_mode, &isicon); } log_msg(OTTS_LOG_DEBUG, MODULE_NAME": Got n=%d", n); if (n > 0) { *len = n + 128; *buf = g_realloc(*buf, *len); if (type == SPD_MSGTYPE_KEY) { n = dumbtts_KeyString(conf, *msg, *buf, *len, cap_mode, &isicon); } else { n = dumbtts_CharString(conf, *msg, *buf, *len, cap_mode, &isicon); } } *msg = NULL; if (!n && isicon) strcpy(icon, "capital"); return n; case SPD_MSGTYPE_TEXT: pos = 0; bytes = module_get_message_part(*msg, xbuf, &pos, 1023, delimiters); log_msg(OTTS_LOG_DEBUG, MODULE_NAME": Got bytes %d, %s", bytes, xbuf); if (bytes <= 0) { *msg = NULL; return 1; } *msg += pos; xbuf[bytes] = 0; n = dumbtts_GetString(conf, xbuf, *buf, *len, punct_mode, punct_some, ",.;:!?'"); if (n > 0) { *len = n + 128; *buf = g_realloc(*buf, *len); n = dumbtts_GetString(conf, xbuf, *buf, *len, punct_mode, punct_some, ",.;:!?'"); } if (n) { *msg = NULL; return 1; } log_msg(OTTS_LOG_DEBUG, MODULE_NAME ": Returning to translator |%s|", *buf); return 0; default: *msg = NULL; log_msg(OTTS_LOG_WARN, MODULE_NAME": Unknown message type\n"); return 1; } }
static int milena_get_msgpart(char **msg,char *icon,char **buf,int *phmode) { int rc,wc,nlen,iscap,*iscp;char *c; if (!*msg || !**msg) return 0; iscp=NULL; iscap=0; log_msg(OTTS_LOG_DEBUG, MODULE_NAME": CAP %d", milena_cap_mode); if (milena_cap_mode == 1) iscp=&iscap; switch(milena_message_type) { case SPD_MSGTYPE_SOUND_ICON: if (strlen(*msg)<63) { strcpy(icon,*msg); rc=1; } else { rc=0; } *msg=NULL; return rc; case SPD_MSGTYPE_SPELL: wc=get_unichar(msg); if (!wc) { *msg=NULL; return 0; } nlen=milena_wchar(milena,wc,NULL,0,iscp); if (nlen>0) { *buf=malloc(nlen); milena_wchar(milena,wc,*buf,nlen,iscp); *phmode=16; } else { *buf=strdup("b\xb3\261d"); *phmode=2; } *phmode=(**msg)?1:16; if (iscap) strcpy(icon,"capital"); return 1; case SPD_MSGTYPE_KEY: nlen=milena_key(milena,*msg,NULL,0,iscp); if (nlen>0) { *buf=malloc(nlen); milena_key(milena,*msg,*buf,nlen,iscp); *phmode=16; } else { *buf=strdup("b\xb3\261d"); *phmode=2; } *msg=NULL; if (iscap) strcpy(icon,"capital"); return 1; case SPD_MSGTYPE_CHAR: log_msg(OTTS_LOG_DEBUG, MODULE_NAME": CHAR [%s]\n",*msg); wc=get_unichar(msg); nlen=milena_wchar(milena,wc,NULL,0,iscp); if (nlen>0) { *buf=malloc(nlen); milena_wchar(milena,wc,*buf,nlen,iscp); *phmode=16; } else { *buf=strdup("b\xb3\261d"); *phmode=2; } *msg=NULL; if (iscap) strcpy(icon,"capital"); return 1; case SPD_MSGTYPE_TEXT: c=*msg; nlen=milena_GetPhraseWithPunct(milena,msg,NULL,0,phmode,milena_punct,MilenaPunctuationSome); if (nlen<=0) { *msg=NULL; return 0; } *msg=c; *buf=malloc(nlen+1); milena_GetPhraseWithPunct(milena,msg,*buf,nlen+1,phmode,milena_punct,MilenaPunctuationSome); return 1; default: *msg=NULL; log_msg(OTTS_LOG_WARN, MODULE_NAME": Unknown message type\n"); return 0; } return 0; }
/* * The scanner * */ int mcy_lex(void) { static const WCHAR ustr_dot1[] = { '.', '\n', 0 }; static const WCHAR ustr_dot2[] = { '.', '\r', '\n', 0 }; static int isinit = 0; int ch; if(!isinit) { isinit++; set_codepage(WMC_DEFAULT_CODEPAGE); add_token(tok_keyword, ustr_codepages, tCODEPAGE, 0, NULL, 0); add_token(tok_keyword, ustr_facility, tFACILITY, 0, NULL, 1); add_token(tok_keyword, ustr_facilitynames, tFACNAMES, 0, NULL, 1); add_token(tok_keyword, ustr_language, tLANGUAGE, 0, NULL, 1); add_token(tok_keyword, ustr_languagenames, tLANNAMES, 0, NULL, 1); add_token(tok_keyword, ustr_messageid, tMSGID, 0, NULL, 1); add_token(tok_keyword, ustr_messageidtypedef, tTYPEDEF, 0, NULL, 1); add_token(tok_keyword, ustr_outputbase, tBASE, 0, NULL, 1); add_token(tok_keyword, ustr_severity, tSEVERITY, 0, NULL, 1); add_token(tok_keyword, ustr_severitynames, tSEVNAMES, 0, NULL, 1); add_token(tok_keyword, ustr_symbolicname, tSYMNAME, 0, NULL, 1); add_token(tok_severity, ustr_error, 0x03, 0, NULL, 0); add_token(tok_severity, ustr_warning, 0x02, 0, NULL, 0); add_token(tok_severity, ustr_informational, 0x01, 0, NULL, 0); add_token(tok_severity, ustr_success, 0x00, 0, NULL, 0); add_token(tok_facility, ustr_application, 0xFFF, 0, NULL, 0); add_token(tok_facility, ustr_system, 0x0FF, 0, NULL, 0); add_token(tok_language, ustr_english, 0x409, 437, ustr_msg00001, 0); } empty_unichar_stack(); while(1) { if(want_line) { while((ch = get_unichar()) != '\n') { if(ch == EOF) xyyerror("Unexpected EOF\n"); push_unichar(ch); } newline(); push_unichar(ch); push_unichar(0); if(!unistrcmp(ustr_dot1, get_unichar_stack()) || !unistrcmp(ustr_dot2, get_unichar_stack())) { want_line = 0; /* Reset the codepage to our default after each message */ set_codepage(WMC_DEFAULT_CODEPAGE); return tMSGEND; } mcy_lval.str = xunistrdup(get_unichar_stack()); return tLINE; } ch = get_unichar(); if(ch == EOF) return EOF; if(ch == '\n') { newline(); if(want_nl) { want_nl = 0; return tNL; } continue; } if(isisochar(ch)) { if(want_file) { int n = 0; while(n < 8 && isisochar(ch)) { int t = char_table[ch]; if((t & CH_PUNCT) || !(t & CH_SHORTNAME)) break; push_unichar(ch); n++; ch = get_unichar(); } unget_unichar(ch); push_unichar(0); want_file = 0; mcy_lval.str = xunistrdup(get_unichar_stack()); return tFILE; } if(char_table[ch] & CH_IDENT) { token_t *tok; while(isisochar(ch) && (char_table[ch] & (CH_IDENT|CH_NUMBER))) { push_unichar(ch); ch = get_unichar(); } unget_unichar(ch); push_unichar(0); if(!(tok = lookup_token(get_unichar_stack()))) { mcy_lval.str = xunistrdup(get_unichar_stack()); return tIDENT; } switch(tok->type) { case tok_keyword: return tok->token; case tok_language: codepage = tok->codepage; /* Fall through */ case tok_severity: case tok_facility: mcy_lval.tok = tok; return tTOKEN; default: internal_error(__FILE__, __LINE__, "Invalid token type encountered\n"); } } if(isspace(ch)) /* Ignore space */ continue; if(isdigit(ch)) return scan_number(ch); } switch(ch) { case ':': case '=': case '+': case '(': case ')': return ch; case ';': while(ch != '\n' && ch != EOF) { push_unichar(ch); ch = get_unichar(); } newline(); push_unichar(ch); /* Include the newline */ push_unichar(0); mcy_lval.str = xunistrdup(get_unichar_stack()); return tCOMMENT; default: xyyerror("Invalid character '%c' (0x%04x)\n", isisochar(ch) && isprint(ch) ? ch : '.', ch); } } }
/* * Number scanner * * state | ch | next state * ------+-----------------+-------------------------- * 0 | [0] | 1 * 0 | [1-9] | 4 * 0 | . | error (should never occur) * 1 | [xX] | 2 * 1 | [0-7] | 3 * 1 | [89a-wyzA-WYZ_] | error invalid digit * 1 | . | return 0 * 2 | [0-9a-fA-F] | 2 * 2 | [g-zG-Z_] | error invalid hex digit * 2 | . | return (hex-number) if TOS != [xX] else error * 3 | [0-7] | 3 * 3 | [89a-zA-Z_] | error invalid octal digit * 3 | . | return (octal-number) * 4 | [0-9] | 4 * 4 | [a-zA-Z_] | error invalid decimal digit * 4 | . | return (decimal-number) * * All non-identifier characters [^a-zA-Z_0-9] terminate the scan * and return the value. This is not entirely correct, but close * enough (should check punctuators as trailing context, but the * char_table is not adapted to that and it is questionable whether * it is worth the trouble). * All non-iso-8859-1 characters are an error. */ static int scan_number(int ch) { int state = 0; int base = 10; empty_char_stack(); while(1) { if(!isisochar(ch)) xyyerror("Invalid digit\n"); switch(state) { case 0: if(isdigit(ch)) { push_char(ch); if(ch == '0') state = 1; else state = 4; } else internal_error(__FILE__, __LINE__, "Non-digit in first number-scanner state\n"); break; case 1: if(ch == 'x' || ch == 'X') { push_char(ch); state = 2; } else if(ch >= '0' && ch <= '7') { push_char(ch); state = 3; } else if(isalpha(ch) || ch == '_') xyyerror("Invalid number digit\n"); else { unget_unichar(ch); mcy_lval.num = 0; return tNUMBER; } break; case 2: if(isxdigit(ch)) push_char(ch); else if(isalpha(ch) || ch == '_' || !isxdigit(tos_char_stack())) xyyerror("Invalid hex digit\n"); else { base = 16; goto finish; } break; case 3: if(ch >= '0' && ch <= '7') push_char(ch); else if(isalnum(ch) || ch == '_') xyyerror("Invalid octal digit\n"); else { base = 8; goto finish; } break; case 4: if(isdigit(ch)) push_char(ch); else if(isalnum(ch) || ch == '_') xyyerror("Invalid decimal digit\n"); else { base = 10; goto finish; } break; default: internal_error(__FILE__, __LINE__, "Invalid state in number-scanner\n"); } ch = get_unichar(); } finish: unget_unichar(ch); push_char(0); mcy_lval.num = strtoul(get_char_stack(), NULL, base); return tNUMBER; }