espeak_ng_STATUS sync_espeak_Key(const char *key) { // symbolic name, symbolicname_character - is there a system resource of symbolic names per language? int letter; int ix; ix = utf8_in(&letter, key); if (key[ix] == 0) // a single character return sync_espeak_Char(letter); my_unique_identifier = 0; my_user_data = NULL; return Synthesize(0, key, 0); // speak key as a text string }
int main(int argc, char **argv) { static struct option long_options[] = { { "help", no_argument, 0, 'h' }, { "stdin", no_argument, 0, 0x100 }, { "compile-debug", optional_argument, 0, 0x101 }, { "compile", optional_argument, 0, 0x102 }, { "punct", optional_argument, 0, 0x103 }, { "voices", optional_argument, 0, 0x104 }, { "stdout", no_argument, 0, 0x105 }, { "split", optional_argument, 0, 0x106 }, { "path", required_argument, 0, 0x107 }, { "phonout", required_argument, 0, 0x108 }, { "pho", no_argument, 0, 0x109 }, { "ipa", optional_argument, 0, 0x10a }, { "version", no_argument, 0, 0x10b }, { "sep", optional_argument, 0, 0x10c }, { "tie", optional_argument, 0, 0x10d }, { "compile-mbrola", optional_argument, 0, 0x10e }, { "compile-intonations", no_argument, 0, 0x10f }, { "compile-phonemes", optional_argument, 0, 0x110 }, { 0, 0, 0, 0 } }; FILE *f_text = NULL; char *p_text = NULL; FILE *f_phonemes_out = stdout; char *data_path = NULL; // use default path for espeak-ng-data int option_index = 0; int c; int ix; char *optarg2; int value; int flag_stdin = 0; int flag_compile = 0; int filesize = 0; int synth_flags = espeakCHARS_AUTO | espeakPHONEMES | espeakENDPAUSE; int volume = -1; int speed = -1; int pitch = -1; int wordgap = -1; int option_capitals = -1; int option_punctuation = -1; int phonemes_separator = 0; int phoneme_options = 0; int option_linelength = 0; int option_waveout = 0; espeak_VOICE voice_select; char filename[200]; char voicename[40]; char devicename[200]; #define N_PUNCTLIST 100 wchar_t option_punctlist[N_PUNCTLIST]; voicename[0] = 0; wavefile[0] = 0; filename[0] = 0; devicename[0] = 0; option_punctlist[0] = 0; while (true) { c = getopt_long(argc, argv, "a:b:d:f:g:hk:l:mp:qs:v:w:xXz", long_options, &option_index); // Detect the end of the options. if (c == -1) break; optarg2 = optarg; switch (c) { case 'b': // input character encoding, 8bit, 16bit, UTF8 if ((sscanf(optarg2, "%d", &value) == 1) && (value <= 4)) synth_flags |= value; else synth_flags |= espeakCHARS_8BIT; break; case 'd': strncpy0(devicename, optarg2, sizeof(devicename)); break; case 'h': printf("\n"); PrintVersion(); printf("%s", help_text); return 0; case 'k': option_capitals = atoi(optarg2); break; case 'x': phoneme_options |= espeakPHONEMES_SHOW; break; case 'X': phoneme_options |= espeakPHONEMES_TRACE; break; case 'm': synth_flags |= espeakSSML; break; case 'p': pitch = atoi(optarg2); break; case 'q': quiet = true; break; case 'f': strncpy0(filename, optarg2, sizeof(filename)); break; case 'l': option_linelength = atoi(optarg2); break; case 'a': volume = atoi(optarg2); break; case 's': speed = atoi(optarg2); break; case 'g': wordgap = atoi(optarg2); break; case 'v': strncpy0(voicename, optarg2, sizeof(voicename)); break; case 'w': option_waveout = 1; strncpy0(wavefile, optarg2, sizeof(filename)); break; case 'z': // remove pause from the end of a sentence synth_flags &= ~espeakENDPAUSE; break; case 0x100: // --stdin flag_stdin = 1; break; case 0x105: // --stdout option_waveout = 1; strcpy(wavefile, "stdout"); break; case 0x101: // --compile-debug case 0x102: // --compile if (optarg2 != NULL && *optarg2) { strncpy0(voicename, optarg2, sizeof(voicename)); flag_compile = c; quiet = true; break; } else { fprintf(stderr, "Voice name to '%s' not specified.\n", c == 0x101 ? "--compile-debug" : "--compile"); exit(EXIT_FAILURE); } case 0x103: // --punct option_punctuation = 1; if (optarg2 != NULL) { ix = 0; while ((ix < N_PUNCTLIST) && ((option_punctlist[ix] = optarg2[ix]) != 0)) ix++; option_punctlist[N_PUNCTLIST-1] = 0; option_punctuation = 2; } break; case 0x104: // --voices espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, 0, data_path, 0); DisplayVoices(stdout, optarg2); exit(0); case 0x106: // -- split if (optarg2 == NULL) samples_split_seconds = 30 * 60; // default 30 minutes else samples_split_seconds = atoi(optarg2) * 60; break; case 0x107: // --path data_path = optarg2; break; case 0x108: // --phonout if ((f_phonemes_out = fopen(optarg2, "w")) == NULL) fprintf(stderr, "Can't write to: %s\n", optarg2); break; case 0x109: // --pho phoneme_options |= espeakPHONEMES_MBROLA; break; case 0x10a: // --ipa phoneme_options |= espeakPHONEMES_IPA; if (optarg2 != NULL) { // deprecated and obsolete switch (atoi(optarg2)) { case 1: phonemes_separator = '_'; break; case 2: phonemes_separator = 0x0361; phoneme_options |= espeakPHONEMES_TIE; break; case 3: phonemes_separator = 0x200d; // ZWJ phoneme_options |= espeakPHONEMES_TIE; break; } } break; case 0x10b: // --version PrintVersion(); exit(0); case 0x10c: // --sep phoneme_options |= espeakPHONEMES_SHOW; if (optarg2 == 0) phonemes_separator = ' '; else utf8_in(&phonemes_separator, optarg2); if (phonemes_separator == 'z') phonemes_separator = 0x200c; // ZWNJ break; case 0x10d: // --tie phoneme_options |= (espeakPHONEMES_SHOW | espeakPHONEMES_TIE); if (optarg2 == 0) phonemes_separator = 0x0361; // default: combining-double-inverted-breve else utf8_in(&phonemes_separator, optarg2); if (phonemes_separator == 'z') phonemes_separator = 0x200d; // ZWJ break; case 0x10e: // --compile-mbrola { espeak_ng_InitializePath(data_path); espeak_ng_ERROR_CONTEXT context = NULL; espeak_ng_STATUS result = espeak_ng_CompileMbrolaVoice(optarg2, stdout, &context); if (result != ENS_OK) { espeak_ng_PrintStatusCodeMessage(result, stderr, context); espeak_ng_ClearErrorContext(&context); return EXIT_FAILURE; } return EXIT_SUCCESS; } case 0x10f: // --compile-intonations { espeak_ng_InitializePath(data_path); espeak_ng_ERROR_CONTEXT context = NULL; espeak_ng_STATUS result = espeak_ng_CompileIntonation(stdout, &context); if (result != ENS_OK) { espeak_ng_PrintStatusCodeMessage(result, stderr, context); espeak_ng_ClearErrorContext(&context); return EXIT_FAILURE; } return EXIT_SUCCESS; } case 0x110: // --compile-phonemes { espeak_ng_InitializePath(data_path); espeak_ng_ERROR_CONTEXT context = NULL; espeak_ng_STATUS result; if (optarg2) { result = espeak_ng_CompilePhonemeDataPath(22050, optarg2, NULL, stdout, &context); } else { result = espeak_ng_CompilePhonemeData(22050, stdout, &context); } if (result != ENS_OK) { espeak_ng_PrintStatusCodeMessage(result, stderr, context); espeak_ng_ClearErrorContext(&context); return EXIT_FAILURE; } return EXIT_SUCCESS; } default: exit(0); } } espeak_ng_InitializePath(data_path); espeak_ng_ERROR_CONTEXT context = NULL; espeak_ng_STATUS result = espeak_ng_Initialize(&context); if (result != ENS_OK) { espeak_ng_PrintStatusCodeMessage(result, stderr, context); espeak_ng_ClearErrorContext(&context); exit(1); } if (option_waveout || quiet) { // writing to a file (or no output), we can use synchronous mode result = espeak_ng_InitializeOutput(ENOUTPUT_MODE_SYNCHRONOUS, 0, devicename[0] ? devicename : NULL); samplerate = espeak_ng_GetSampleRate(); samples_split = samplerate * samples_split_seconds; espeak_SetSynthCallback(SynthCallback); if (samples_split) { char *extn; extn = strrchr(wavefile, '.'); if ((extn != NULL) && ((wavefile + strlen(wavefile) - extn) <= 4)) { strcpy(filetype, extn); *extn = 0; } } } else { // play the sound output result = espeak_ng_InitializeOutput(PLAYBACK_MODE, 0, devicename[0] ? devicename : NULL); samplerate = espeak_ng_GetSampleRate(); } if (result != ENS_OK) { espeak_ng_PrintStatusCodeMessage(result, stderr, NULL); exit(EXIT_FAILURE); } if (voicename[0] == 0) strcpy(voicename, ESPEAKNG_DEFAULT_VOICE); result = espeak_ng_SetVoiceByName(voicename); if (result != ENS_OK) { memset(&voice_select, 0, sizeof(voice_select)); voice_select.languages = voicename; result = espeak_ng_SetVoiceByProperties(&voice_select); if (result != ENS_OK) { espeak_ng_PrintStatusCodeMessage(result, stderr, NULL); exit(EXIT_FAILURE); } } if (flag_compile) { // This must be done after the voice is set espeak_ng_ERROR_CONTEXT context = NULL; espeak_ng_STATUS result = espeak_ng_CompileDictionary("", NULL, stderr, flag_compile & 0x1, &context); if (result != ENS_OK) { espeak_ng_PrintStatusCodeMessage(result, stderr, context); espeak_ng_ClearErrorContext(&context); return EXIT_FAILURE; } return EXIT_SUCCESS; } // set any non-default values of parameters. This must be done after espeak_Initialize() if (speed > 0) espeak_SetParameter(espeakRATE, speed, 0); if (volume >= 0) espeak_SetParameter(espeakVOLUME, volume, 0); if (pitch >= 0) espeak_SetParameter(espeakPITCH, pitch, 0); if (option_capitals >= 0) espeak_SetParameter(espeakCAPITALS, option_capitals, 0); if (option_punctuation >= 0) espeak_SetParameter(espeakPUNCTUATION, option_punctuation, 0); if (wordgap >= 0) espeak_SetParameter(espeakWORDGAP, wordgap, 0); if (option_linelength > 0) espeak_SetParameter(espeakLINELENGTH, option_linelength, 0); if (option_punctuation == 2) espeak_SetPunctuationList(option_punctlist); espeak_SetPhonemeTrace(phoneme_options | (phonemes_separator << 8), f_phonemes_out); if (filename[0] == 0) { if ((optind < argc) && (flag_stdin == 0)) { // there's a non-option parameter, and no -f or --stdin // use it as text p_text = argv[optind]; } else { f_text = stdin; if (flag_stdin == 0) flag_stdin = 2; } } else { struct stat st; if (stat(filename, &st) != 0) { fprintf(stderr, "Failed to stat() file '%s'\n", filename); exit(EXIT_FAILURE); } filesize = GetFileLength(filename); f_text = fopen(filename, "r"); if (f_text == NULL) { fprintf(stderr, "Failed to read file '%s'\n", filename); exit(EXIT_FAILURE); } if (S_ISFIFO(st.st_mode)) { flag_stdin = 2; } } if (p_text != NULL) { int size; size = strlen(p_text); espeak_Synth(p_text, size+1, 0, POS_CHARACTER, 0, synth_flags, NULL, NULL); } else if (flag_stdin) { size_t max = 1000; if ((p_text = (char *)malloc(max)) == NULL) { espeak_ng_PrintStatusCodeMessage(ENOMEM, stderr, NULL); exit(EXIT_FAILURE); } if (flag_stdin == 2) { // line by line input on stdin or from FIFO while (fgets(p_text, max, f_text) != NULL) { p_text[max-1] = 0; espeak_Synth(p_text, max, 0, POS_CHARACTER, 0, synth_flags, NULL, NULL); // Allow subprocesses to use the audio data through pipes. fflush(stdout); } if (f_text != stdin) { fclose(f_text); } } else { // bulk input on stdin ix = 0; while (true) { if ((c = fgetc(stdin)) == EOF) break; p_text[ix++] = (char)c; if (ix >= (max-1)) { char *new_text = NULL; if (max <= SIZE_MAX - 1000) { max += 1000; new_text = (char *)realloc(p_text, max); } if (new_text == NULL) { free(p_text); espeak_ng_PrintStatusCodeMessage(ENOMEM, stderr, NULL); exit(EXIT_FAILURE); } p_text = new_text; } } if (ix > 0) { p_text[ix-1] = 0; espeak_Synth(p_text, ix+1, 0, POS_CHARACTER, 0, synth_flags, NULL, NULL); } } free(p_text); } else if (f_text != NULL) { if ((p_text = (char *)malloc(filesize+1)) == NULL) { espeak_ng_PrintStatusCodeMessage(ENOMEM, stderr, NULL); exit(EXIT_FAILURE); } fread(p_text, 1, filesize, f_text); p_text[filesize] = 0; espeak_Synth(p_text, filesize+1, 0, POS_CHARACTER, 0, synth_flags, NULL, NULL); fclose(f_text); free(p_text); } result = espeak_ng_Synchronize(); if (result != ENS_OK) { espeak_ng_PrintStatusCodeMessage(result, stderr, NULL); exit(EXIT_FAILURE); } if (f_phonemes_out != stdout) fclose(f_phonemes_out); CloseWavFile(); espeak_ng_Terminate(); return 0; }
static char *compile_rule(char *input) {//=================================== int ix; unsigned char c; int wc; char *p; char *prule; int len; int len_name; int state=2; int finish=0; int pre_bracket=0; char buf[80]; char output[150]; unsigned char bad_phoneme[4]; buf[0]=0; rule_cond[0]=0; rule_pre[0]=0; rule_post[0]=0; rule_match[0]=0; rule_phonemes[0]=0; p = buf; for(ix=0; finish==0; ix++) { c = input[ix]; switch(c = input[ix]) { case ')': // end of prefix section *p = 0; state = 1; pre_bracket = 1; copy_rule_string(buf,state); p = buf; break; case '(': // start of suffix section *p = 0; state = 2; copy_rule_string(buf,state); state = 3; p = buf; break; case '\n': // end of line case '\r': case 0: // end of line *p = 0; copy_rule_string(buf,state); finish=1; break; case '\t': // end of section section case ' ': *p = 0; copy_rule_string(buf,state); p = buf; break; case '?': if(state==2) state=0; else *p++ = c; break; default: *p++ = c; break; } } if(strcmp(rule_match,"$group")==0) strcpy(rule_match,group_name); if(rule_match[0]==0) return(NULL); EncodePhonemes(rule_phonemes,buf,bad_phoneme); for(ix=0;; ix++) { if((c = buf[ix])==0) break; if(c==255) { fprintf(f_log,"%5d: Bad phoneme [%c] in %s",linenum,bad_phoneme[0],input); error_count++; break; } } strcpy(output,buf); len = strlen(buf)+1; len_name = strlen(group_name); if((len_name > 0) && (memcmp(rule_match,group_name,len_name) != 0)) { utf8_in(&wc,rule_match); if((group_name[0] == '9') && IsDigit(wc)) { // numeric group, rule_match starts with a digit, so OK } else { fprintf(f_log,"%5d: Wrong initial letters '%s' for group '%s'\n",linenum,rule_match,group_name); error_count++; } } strcpy(&output[len],rule_match); len += strlen(rule_match); if(debug_flag) { output[len] = RULE_LINENUM; output[len+1] = (linenum % 255) + 1; output[len+2] = (linenum / 255) + 1; len+=3; } if(rule_cond[0] != 0) { ix = -1; if(rule_cond[0] == '!') { // allow the rule only if the condition number is NOT set for the voice ix = atoi(&rule_cond[1]) + 32; } else { // allow the rule only if the condition number is set for the voice ix = atoi(rule_cond); } if((ix > 0) && (ix < 255)) { output[len++] = RULE_CONDITION; output[len++] = ix; } else { fprintf(f_log,"%5d: bad condition number ?%d\n",linenum,ix); error_count++; } } if(rule_pre[0] != 0) { output[len++] = RULE_PRE; // output PRE string in reverse order for(ix = strlen(rule_pre)-1; ix>=0; ix--) output[len++] = rule_pre[ix]; } if(rule_post[0] != 0) { sprintf(&output[len],"%c%s",RULE_POST,rule_post); len += (strlen(rule_post)+1); } output[len++]=0; prule = (char *)malloc(len); memcpy(prule,output,len); return(prule); } // end of compile_rule
static int compile_line(char *linebuf, char *dict_line, int *hash) {//=============================================================== // Compile a line in the language_list file unsigned char c; char *p; char *word; char *phonetic; unsigned int ix; int step; unsigned int n_flag_codes = 0; int flag_offset; int length; int multiple_words = 0; char *multiple_string = NULL; char *multiple_string_end = NULL; int len_word; int len_phonetic; int text_not_phonemes; // this word specifies replacement text, not phonemes unsigned int wc; int all_upper_case; char *mnemptr; char *comment; unsigned char flag_codes[100]; char encoded_ph[200]; unsigned char bad_phoneme[4]; static char nullstring[] = {0}; comment = NULL; text_not_phonemes = 0; phonetic = word = nullstring; if(memcmp(linebuf,"_-",2)==0) { step=1; // TEST } p = linebuf; // while(isspace2(*p)) p++; #ifdef deleted if(*p == '$') { if(memcmp(p,"$textmode",9) == 0) { text_mode = 1; return(0); } if(memcmp(p,"$phonememode",12) == 0) { text_mode = 0; return(0); } } #endif step = 0; c = 0; while(c != '\n') { c = *p; if((c == '?') && (step==0)) { // conditional rule, allow only if the numbered condition is set for the voice flag_offset = 100; p++; if(*p == '!') { // allow only if the numbered condition is NOT set flag_offset = 132; p++; } ix = 0; if(isdigit(*p)) { ix += (*p-'0'); p++; } if(isdigit(*p)) { ix = ix*10 + (*p-'0'); p++; } flag_codes[n_flag_codes++] = ix + flag_offset; c = *p; } if((c == '$') && isalnum(p[1])) { /* read keyword parameter */ mnemptr = p; while(!isspace2(c = *p)) p++; *p = 0; ix = LookupMnem(mnem_flags,mnemptr); if(ix > 0) { if(ix == 200) { text_mode = 1; } else if(ix == 201) { text_mode = 0; } else if(ix == BITNUM_FLAG_TEXTMODE) { text_not_phonemes = 1; } else { flag_codes[n_flag_codes++] = ix; } } else { fprintf(f_log,"%5d: Unknown keyword: %s\n",linenum,mnemptr); error_count++; } } if((c == '/') && (p[1] == '/') && (multiple_words==0)) { c = '\n'; /* "//" treat comment as end of line */ comment = p; } switch(step) { case 0: if(c == '(') { multiple_words = 1; word = p+1; step = 1; } else if(!isspace2(c)) { word = p; step = 1; } break; case 1: if((c == '-') && (word[0] != '_')) { flag_codes[n_flag_codes++] = BITNUM_FLAG_HYPHENATED; c = ' '; } if(isspace2(c)) { p[0] = 0; /* terminate english word */ if(multiple_words) { multiple_string = multiple_string_end = p+1; step = 2; } else { step = 3; } } else if((c == ')') && multiple_words) { p[0] = 0; step = 3; multiple_words = 0; } break; case 2: if(isspace2(c)) { multiple_words++; } else if(c == ')') { p[0] = ' '; // terminate extra string multiple_string_end = p+1; step = 3; } break; case 3: if(!isspace2(c)) { phonetic = p; step = 4; } break; case 4: if(isspace2(c)) { p[0] = 0; /* terminate phonetic */ step = 5; } break; case 5: break; } p++; } if(word[0] == 0) { #ifdef OPT_FORMAT if(comment != NULL) fprintf(f_log,"%s",comment); else fputc('\n',f_log); #endif return(0); /* blank line */ } if(text_mode) text_not_phonemes = 1; if(text_not_phonemes != translator->langopts.textmode) { flag_codes[n_flag_codes++] = BITNUM_FLAG_TEXTMODE; } if(text_not_phonemes) { // this is replacement text, so don't encode as phonemes. Restrict the length of the replacement word strncpy0(encoded_ph,phonetic,N_WORD_BYTES-4); } else { EncodePhonemes(phonetic,encoded_ph,bad_phoneme); if(strchr(encoded_ph,phonSWITCH) != 0) { flag_codes[n_flag_codes++] = BITNUM_FLAG_ONLY_S; // don't match on suffixes (except 's') when switching languages } // check for errors in the phonemes codes for(ix=0; ix<sizeof(encoded_ph); ix++) { c = encoded_ph[ix]; if(c == 0) break; if(c == 255) { /* unrecognised phoneme, report error */ fprintf(f_log,"%5d: Bad phoneme [%c] (0x%x) in: %s %s\n",linenum,bad_phoneme[0],bad_phoneme[0],word,phonetic); error_count++; } } } if(sscanf(word,"U+%x",&wc) == 1) { // Character code ix = utf8_out(wc, word); word[ix] = 0; } else if(word[0] != '_') { // convert to lower case, and note if the word is all-capitals int c2; all_upper_case = 1; p = word; for(p=word;;) { // this assumes that the lower case char is the same length as the upper case char // OK, except for Turkish "I", but use towlower() rather than towlower2() ix = utf8_in(&c2,p); if(c2 == 0) break; if(iswupper(c2)) { utf8_out(towlower(c2),p); } else { all_upper_case = 0; } p += ix; } if(all_upper_case) { flag_codes[n_flag_codes++] = BITNUM_FLAG_ALLCAPS; } } len_word = strlen(word); if(transpose_offset > 0) { len_word = TransposeAlphabet(word, transpose_offset, transpose_min, transpose_max); } *hash = HashDictionary(word); len_phonetic = strlen(encoded_ph); dict_line[1] = len_word; // bit 6 indicates whether the word has been compressed len_word &= 0x3f; memcpy(&dict_line[2],word,len_word); if(len_phonetic == 0) { // no phonemes specified. set bit 7 dict_line[1] |= 0x80; length = len_word + 2; } else { length = len_word + len_phonetic + 3; strcpy(&dict_line[(len_word)+2],encoded_ph); } for(ix=0; ix<n_flag_codes; ix++) { dict_line[ix+length] = flag_codes[ix]; } length += n_flag_codes; if((multiple_string != NULL) && (multiple_words > 0)) { if(multiple_words > 10) { fprintf(f_log,"%5d: Two many parts in a multi-word entry: %d\n",linenum,multiple_words); } else { dict_line[length++] = 80 + multiple_words; ix = multiple_string_end - multiple_string; memcpy(&dict_line[length],multiple_string,ix); length += ix; } } dict_line[0] = length; #ifdef OPT_FORMAT spaces = 16; for(ix=0; ix<n_flag_codes; ix++) { if(flag_codes[ix] >= 100) { fprintf(f_log,"?%d ",flag_codes[ix]-100); spaces -= 3; } } fprintf(f_log,"%s",word); spaces -= strlen(word); DecodePhonemes(encoded_ph,decoded_ph); while(spaces-- > 0) fputc(' ',f_log); spaces += (14 - strlen(decoded_ph)); fprintf(f_log," %s",decoded_ph); while(spaces-- > 0) fputc(' ',f_log); for(ix=0; ix<n_flag_codes; ix++) { if(flag_codes[ix] < 100) fprintf(f_log," %s",lookup_mnem(mnem_flags,flag_codes[ix])); } if(comment != NULL) fprintf(f_log," %s",comment); else fputc('\n',f_log); #endif return(length); } /* end of compile_line */
static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp) {//==================================================================== char *prule; unsigned char *p; int ix; int c; int gp; FILE *f_temp; int n_rules=0; int count=0; int different; const char *prev_rgroup_name; unsigned int char_code; int compile_mode=0; char *buf; char buf1[200]; char *rules[N_RULES]; int n_rgroups = 0; RGROUP rgroup[N_RULE_GROUP2]; linenum = 0; group_name[0] = 0; if((f_temp = fopen_log(fname_temp,"wb")) == NULL) return(1); for(;;) { linenum++; buf = fgets(buf1,sizeof(buf1),f_in); if(buf != NULL) { if((p = (unsigned char *)strstr(buf,"//")) != NULL) *p = 0; if(buf[0] == '\r') buf++; // ignore extra \r in \r\n } if((buf == NULL) || (buf[0] == '.')) { // next .group or end of file, write out the previous group if(n_rules > 0) { strcpy(rgroup[n_rgroups].name,group_name); rgroup[n_rgroups].start = ftell(f_temp); output_rule_group(f_temp,n_rules,rules,group_name); rgroup[n_rgroups].length = ftell(f_temp) - rgroup[n_rgroups].start; n_rgroups++; count += n_rules; } n_rules = 0; if(compile_mode == 2) { // end of the character replacements section fwrite(&n_rules,1,4,f_out); // write a zero word to terminate the replacemenmt list compile_mode = 0; } if(buf == NULL) break; // end of file if(memcmp(buf,".L",2)==0) { compile_lettergroup(&buf[2], f_out); continue; } if(memcmp(buf,".replace",8)==0) { compile_mode = 2; fputc(RULE_GROUP_START,f_out); fputc(RULE_REPLACEMENTS,f_out); // advance to next word boundary while((ftell(f_out) & 3) != 0) fputc(0,f_out); } if(memcmp(buf,".group",6)==0) { compile_mode = 1; p = (unsigned char *)&buf[6]; while((p[0]==' ') || (p[0]=='\t')) p++; // Note: Windows isspace(0xe1) gives TRUE ! ix = 0; while((*p > ' ') && (ix < LEN_GROUP_NAME)) group_name[ix++] = *p++; group_name[ix]=0; if(sscanf(group_name,"0x%x",&char_code)==1) { // group character is given as a character code (max 16 bits) p = (unsigned char *)group_name; if(char_code > 0x100) { *p++ = (char_code >> 8); } *p++ = char_code; *p = 0; } if(strlen(group_name) > 2) { if(utf8_in(&c,group_name) < 2) { fprintf(f_log,"%5d: Group name longer than 2 bytes (UTF8)",linenum); error_count++; } group_name[2] = 0; } }