コード例 #1
0
ファイル: speech.c プロジェクト: rhdunn/espeak
espeak_ng_STATUS sync_espeak_Key(const char *key)
{
	// symbolic name, symbolicname_character  - is there a system resource of symbolic names per language?
	int letter;
	int ix;

	ix = utf8_in(&letter, key);
	if (key[ix] == 0) // a single character
		return sync_espeak_Char(letter);

	my_unique_identifier = 0;
	my_user_data = NULL;
	return Synthesize(0, key, 0); // speak key as a text string
}
コード例 #2
0
ファイル: espeak-ng.c プロジェクト: rhdunn/espeak
int main(int argc, char **argv)
{
	static struct option long_options[] = {
		{ "help",    no_argument,       0, 'h' },
		{ "stdin",   no_argument,       0, 0x100 },
		{ "compile-debug", optional_argument, 0, 0x101 },
		{ "compile", optional_argument, 0, 0x102 },
		{ "punct",   optional_argument, 0, 0x103 },
		{ "voices",  optional_argument, 0, 0x104 },
		{ "stdout",  no_argument,       0, 0x105 },
		{ "split",   optional_argument, 0, 0x106 },
		{ "path",    required_argument, 0, 0x107 },
		{ "phonout", required_argument, 0, 0x108 },
		{ "pho",     no_argument,       0, 0x109 },
		{ "ipa",     optional_argument, 0, 0x10a },
		{ "version", no_argument,       0, 0x10b },
		{ "sep",     optional_argument, 0, 0x10c },
		{ "tie",     optional_argument, 0, 0x10d },
		{ "compile-mbrola", optional_argument, 0, 0x10e },
		{ "compile-intonations", no_argument, 0, 0x10f },
		{ "compile-phonemes", optional_argument, 0, 0x110 },
		{ 0, 0, 0, 0 }
	};

	FILE *f_text = NULL;
	char *p_text = NULL;
	FILE *f_phonemes_out = stdout;
	char *data_path = NULL; // use default path for espeak-ng-data

	int option_index = 0;
	int c;
	int ix;
	char *optarg2;
	int value;
	int flag_stdin = 0;
	int flag_compile = 0;
	int filesize = 0;
	int synth_flags = espeakCHARS_AUTO | espeakPHONEMES | espeakENDPAUSE;

	int volume = -1;
	int speed = -1;
	int pitch = -1;
	int wordgap = -1;
	int option_capitals = -1;
	int option_punctuation = -1;
	int phonemes_separator = 0;
	int phoneme_options = 0;
	int option_linelength = 0;
	int option_waveout = 0;

	espeak_VOICE voice_select;
	char filename[200];
	char voicename[40];
	char devicename[200];
	#define N_PUNCTLIST 100
	wchar_t option_punctlist[N_PUNCTLIST];

	voicename[0] = 0;
	wavefile[0] = 0;
	filename[0] = 0;
	devicename[0] = 0;
	option_punctlist[0] = 0;

	while (true) {
		c = getopt_long(argc, argv, "a:b:d:f:g:hk:l:mp:qs:v:w:xXz",
		                long_options, &option_index);

		// Detect the end of the options.
		if (c == -1)
			break;
		optarg2 = optarg;

		switch (c)
		{
		case 'b':
			// input character encoding, 8bit, 16bit, UTF8
			if ((sscanf(optarg2, "%d", &value) == 1) && (value <= 4))
				synth_flags |= value;
			else
				synth_flags |= espeakCHARS_8BIT;
			break;
		case 'd':
			strncpy0(devicename, optarg2, sizeof(devicename));
			break;
		case 'h':
			printf("\n");
			PrintVersion();
			printf("%s", help_text);
			return 0;
		case 'k':
			option_capitals = atoi(optarg2);
			break;
		case 'x':
			phoneme_options |= espeakPHONEMES_SHOW;
			break;
		case 'X':
			phoneme_options |= espeakPHONEMES_TRACE;
			break;
		case 'm':
			synth_flags |= espeakSSML;
			break;
		case 'p':
			pitch = atoi(optarg2);
			break;
		case 'q':
			quiet = true;
			break;
		case 'f':
			strncpy0(filename, optarg2, sizeof(filename));
			break;
		case 'l':
			option_linelength = atoi(optarg2);
			break;
		case 'a':
			volume = atoi(optarg2);
			break;
		case 's':
			speed = atoi(optarg2);
			break;
		case 'g':
			wordgap = atoi(optarg2);
			break;
		case 'v':
			strncpy0(voicename, optarg2, sizeof(voicename));
			break;
		case 'w':
			option_waveout = 1;
			strncpy0(wavefile, optarg2, sizeof(filename));
			break;
		case 'z': // remove pause from the end of a sentence
			synth_flags &= ~espeakENDPAUSE;
			break;
		case 0x100: // --stdin
			flag_stdin = 1;
			break;
		case 0x105: // --stdout
			option_waveout = 1;
			strcpy(wavefile, "stdout");
			break;
		case 0x101: // --compile-debug
		case 0x102: // --compile
			if (optarg2 != NULL && *optarg2) {
				strncpy0(voicename, optarg2, sizeof(voicename));
				flag_compile = c;
				quiet = true;
				break;
			} else {
				fprintf(stderr, "Voice name to '%s' not specified.\n", c == 0x101 ? "--compile-debug" : "--compile");
				exit(EXIT_FAILURE);
			}
		case 0x103: // --punct
			option_punctuation = 1;
			if (optarg2 != NULL) {
				ix = 0;
				while ((ix < N_PUNCTLIST) && ((option_punctlist[ix] = optarg2[ix]) != 0)) ix++;
				option_punctlist[N_PUNCTLIST-1] = 0;
				option_punctuation = 2;
			}
			break;
		case 0x104: // --voices
			espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, 0, data_path, 0);
			DisplayVoices(stdout, optarg2);
			exit(0);
		case 0x106: // -- split
			if (optarg2 == NULL)
				samples_split_seconds = 30 * 60; // default 30 minutes
			else
				samples_split_seconds = atoi(optarg2) * 60;
			break;
		case 0x107: // --path
			data_path = optarg2;
			break;
		case 0x108: // --phonout
			if ((f_phonemes_out = fopen(optarg2, "w")) == NULL)
				fprintf(stderr, "Can't write to: %s\n", optarg2);
			break;
		case 0x109: // --pho
			phoneme_options |= espeakPHONEMES_MBROLA;
			break;
		case 0x10a: // --ipa
			phoneme_options |= espeakPHONEMES_IPA;
			if (optarg2 != NULL) {
				// deprecated and obsolete
				switch (atoi(optarg2))
				{
				case 1:
					phonemes_separator = '_';
					break;
				case 2:
					phonemes_separator = 0x0361;
					phoneme_options |= espeakPHONEMES_TIE;
					break;
				case 3:
					phonemes_separator = 0x200d; // ZWJ
					phoneme_options |= espeakPHONEMES_TIE;
					break;
				}

			}
			break;
		case 0x10b: // --version
			PrintVersion();
			exit(0);
		case 0x10c: // --sep
			phoneme_options |= espeakPHONEMES_SHOW;
			if (optarg2 == 0)
				phonemes_separator = ' ';
			else
				utf8_in(&phonemes_separator, optarg2);
			if (phonemes_separator == 'z')
				phonemes_separator = 0x200c; // ZWNJ
			break;
		case 0x10d: // --tie
			phoneme_options |= (espeakPHONEMES_SHOW | espeakPHONEMES_TIE);
			if (optarg2 == 0)
				phonemes_separator = 0x0361; // default: combining-double-inverted-breve
			else
				utf8_in(&phonemes_separator, optarg2);
			if (phonemes_separator == 'z')
				phonemes_separator = 0x200d; // ZWJ
			break;
		case 0x10e: // --compile-mbrola
		{
			espeak_ng_InitializePath(data_path);
			espeak_ng_ERROR_CONTEXT context = NULL;
			espeak_ng_STATUS result = espeak_ng_CompileMbrolaVoice(optarg2, stdout, &context);
			if (result != ENS_OK) {
				espeak_ng_PrintStatusCodeMessage(result, stderr, context);
				espeak_ng_ClearErrorContext(&context);
				return EXIT_FAILURE;
			}
			return EXIT_SUCCESS;
		}
		case 0x10f: // --compile-intonations
		{
			espeak_ng_InitializePath(data_path);
			espeak_ng_ERROR_CONTEXT context = NULL;
			espeak_ng_STATUS result = espeak_ng_CompileIntonation(stdout, &context);
			if (result != ENS_OK) {
				espeak_ng_PrintStatusCodeMessage(result, stderr, context);
				espeak_ng_ClearErrorContext(&context);
				return EXIT_FAILURE;
			}
			return EXIT_SUCCESS;
		}
		case 0x110: // --compile-phonemes
		{
			espeak_ng_InitializePath(data_path);
			espeak_ng_ERROR_CONTEXT context = NULL;
			espeak_ng_STATUS result;
			if (optarg2) {
				result = espeak_ng_CompilePhonemeDataPath(22050, optarg2, NULL, stdout, &context);
			} else {
				result = espeak_ng_CompilePhonemeData(22050, stdout, &context);
			}
			if (result != ENS_OK) {
				espeak_ng_PrintStatusCodeMessage(result, stderr, context);
				espeak_ng_ClearErrorContext(&context);
				return EXIT_FAILURE;
			}
			return EXIT_SUCCESS;
		}
		default:
			exit(0);
		}
	}

	espeak_ng_InitializePath(data_path);
	espeak_ng_ERROR_CONTEXT context = NULL;
	espeak_ng_STATUS result = espeak_ng_Initialize(&context);
	if (result != ENS_OK) {
		espeak_ng_PrintStatusCodeMessage(result, stderr, context);
		espeak_ng_ClearErrorContext(&context);
		exit(1);
	}

	if (option_waveout || quiet) {
		// writing to a file (or no output), we can use synchronous mode
		result = espeak_ng_InitializeOutput(ENOUTPUT_MODE_SYNCHRONOUS, 0, devicename[0] ? devicename : NULL);
		samplerate = espeak_ng_GetSampleRate();
		samples_split = samplerate * samples_split_seconds;

		espeak_SetSynthCallback(SynthCallback);
		if (samples_split) {
			char *extn;
			extn = strrchr(wavefile, '.');
			if ((extn != NULL) && ((wavefile + strlen(wavefile) - extn) <= 4)) {
				strcpy(filetype, extn);
				*extn = 0;
			}
		}
	} else {
		// play the sound output
		result = espeak_ng_InitializeOutput(PLAYBACK_MODE, 0, devicename[0] ? devicename : NULL);
		samplerate = espeak_ng_GetSampleRate();
	}

	if (result != ENS_OK) {
		espeak_ng_PrintStatusCodeMessage(result, stderr, NULL);
		exit(EXIT_FAILURE);
	}

	if (voicename[0] == 0)
		strcpy(voicename, ESPEAKNG_DEFAULT_VOICE);

	result = espeak_ng_SetVoiceByName(voicename);
	if (result != ENS_OK) {
		memset(&voice_select, 0, sizeof(voice_select));
		voice_select.languages = voicename;
		result = espeak_ng_SetVoiceByProperties(&voice_select);
		if (result != ENS_OK) {
			espeak_ng_PrintStatusCodeMessage(result, stderr, NULL);
			exit(EXIT_FAILURE);
		}
	}

	if (flag_compile) {
		// This must be done after the voice is set
		espeak_ng_ERROR_CONTEXT context = NULL;
		espeak_ng_STATUS result = espeak_ng_CompileDictionary("", NULL, stderr, flag_compile & 0x1, &context);
		if (result != ENS_OK) {
			espeak_ng_PrintStatusCodeMessage(result, stderr, context);
			espeak_ng_ClearErrorContext(&context);
			return EXIT_FAILURE;
		}
		return EXIT_SUCCESS;
	}

	// set any non-default values of parameters. This must be done after espeak_Initialize()
	if (speed > 0)
		espeak_SetParameter(espeakRATE, speed, 0);
	if (volume >= 0)
		espeak_SetParameter(espeakVOLUME, volume, 0);
	if (pitch >= 0)
		espeak_SetParameter(espeakPITCH, pitch, 0);
	if (option_capitals >= 0)
		espeak_SetParameter(espeakCAPITALS, option_capitals, 0);
	if (option_punctuation >= 0)
		espeak_SetParameter(espeakPUNCTUATION, option_punctuation, 0);
	if (wordgap >= 0)
		espeak_SetParameter(espeakWORDGAP, wordgap, 0);
	if (option_linelength > 0)
		espeak_SetParameter(espeakLINELENGTH, option_linelength, 0);
	if (option_punctuation == 2)
		espeak_SetPunctuationList(option_punctlist);

	espeak_SetPhonemeTrace(phoneme_options | (phonemes_separator << 8), f_phonemes_out);

	if (filename[0] == 0) {
		if ((optind < argc) && (flag_stdin == 0)) {
			// there's a non-option parameter, and no -f or --stdin
			// use it as text
			p_text = argv[optind];
		} else {
			f_text = stdin;
			if (flag_stdin == 0)
				flag_stdin = 2;
		}
	} else {
		struct stat st;
		if (stat(filename, &st) != 0) {
			fprintf(stderr, "Failed to stat() file '%s'\n", filename);
			exit(EXIT_FAILURE);
		}
		filesize = GetFileLength(filename);
		f_text = fopen(filename, "r");
		if (f_text == NULL) {
			fprintf(stderr, "Failed to read file '%s'\n", filename);
			exit(EXIT_FAILURE);
		}
		if (S_ISFIFO(st.st_mode)) {
			flag_stdin = 2;
		}
	}

	if (p_text != NULL) {
		int size;
		size = strlen(p_text);
		espeak_Synth(p_text, size+1, 0, POS_CHARACTER, 0, synth_flags, NULL, NULL);
	} else if (flag_stdin) {
		size_t max = 1000;
		if ((p_text = (char *)malloc(max)) == NULL) {
			espeak_ng_PrintStatusCodeMessage(ENOMEM, stderr, NULL);
			exit(EXIT_FAILURE);
		}

		if (flag_stdin == 2) {
			// line by line input on stdin or from FIFO
			while (fgets(p_text, max, f_text) != NULL) {
				p_text[max-1] = 0;
				espeak_Synth(p_text, max, 0, POS_CHARACTER, 0, synth_flags, NULL, NULL);
				// Allow subprocesses to use the audio data through pipes.
				fflush(stdout);
			}
			if (f_text != stdin) {
				fclose(f_text);
			}
		} else {
			// bulk input on stdin
			ix = 0;
			while (true) {
				if ((c = fgetc(stdin)) == EOF)
					break;
				p_text[ix++] = (char)c;
				if (ix >= (max-1)) {
					char *new_text = NULL;
					if (max <= SIZE_MAX - 1000) {
						max += 1000;
						new_text = (char *)realloc(p_text, max);
					}
					if (new_text == NULL) {
						free(p_text);
						espeak_ng_PrintStatusCodeMessage(ENOMEM, stderr, NULL);
						exit(EXIT_FAILURE);
					}
					p_text = new_text;
				}
			}
			if (ix > 0) {
				p_text[ix-1] = 0;
				espeak_Synth(p_text, ix+1, 0, POS_CHARACTER, 0, synth_flags, NULL, NULL);
			}
		}

		free(p_text);
	} else if (f_text != NULL) {
		if ((p_text = (char *)malloc(filesize+1)) == NULL) {
			espeak_ng_PrintStatusCodeMessage(ENOMEM, stderr, NULL);
			exit(EXIT_FAILURE);
		}

		fread(p_text, 1, filesize, f_text);
		p_text[filesize] = 0;
		espeak_Synth(p_text, filesize+1, 0, POS_CHARACTER, 0, synth_flags, NULL, NULL);
		fclose(f_text);

		free(p_text);
	}

	result = espeak_ng_Synchronize();
	if (result != ENS_OK) {
		espeak_ng_PrintStatusCodeMessage(result, stderr, NULL);
		exit(EXIT_FAILURE);
	}

	if (f_phonemes_out != stdout)
		fclose(f_phonemes_out);

	CloseWavFile();
	espeak_ng_Terminate();
	return 0;
}
コード例 #3
0
ファイル: compiledict.cpp プロジェクト: HeryLong/booxsdk
static char *compile_rule(char *input)
{//===================================
	int ix;
	unsigned char c;
	int wc;
	char *p;
	char *prule;
	int len;
	int len_name;
	int state=2;
	int finish=0;
	int pre_bracket=0;
	char buf[80];
	char output[150];
	unsigned char bad_phoneme[4];

	buf[0]=0;
	rule_cond[0]=0;
	rule_pre[0]=0;
	rule_post[0]=0;
	rule_match[0]=0;
	rule_phonemes[0]=0;

	p = buf;
	
	for(ix=0; finish==0; ix++)
	{
		c = input[ix];

		switch(c = input[ix])
		{
		case ')':		// end of prefix section
			*p = 0;
			state = 1;
			pre_bracket = 1;
			copy_rule_string(buf,state);
			p = buf;
			break;
			
		case '(':		// start of suffix section
			*p = 0;
			state = 2;
			copy_rule_string(buf,state);
			state = 3;
			p = buf;
			break;
			
		case '\n':		// end of line
		case '\r':
		case 0:			// end of line
			*p = 0;
			copy_rule_string(buf,state);
			finish=1;
			break;
			
		case '\t':		// end of section section
		case ' ':
			*p = 0;
			copy_rule_string(buf,state);
			p = buf;
			break;
			
		case '?':
			if(state==2)
				state=0;
			else
				*p++ = c;
			break;

		default:
			*p++ = c;
			break;
		}
	}
	
	if(strcmp(rule_match,"$group")==0)
		strcpy(rule_match,group_name);

	if(rule_match[0]==0)
		return(NULL);

	EncodePhonemes(rule_phonemes,buf,bad_phoneme);
	for(ix=0;; ix++)
	{
		if((c = buf[ix])==0) break;
		if(c==255)
		{
			fprintf(f_log,"%5d: Bad phoneme [%c] in %s",linenum,bad_phoneme[0],input);
			error_count++;
			break;
		}
	}
	strcpy(output,buf);
	len = strlen(buf)+1;
	
	len_name = strlen(group_name);
	if((len_name > 0) && (memcmp(rule_match,group_name,len_name) != 0))
	{
		utf8_in(&wc,rule_match);
		if((group_name[0] == '9') && IsDigit(wc))
		{
			// numeric group, rule_match starts with a digit, so OK
		}
		else
		{
			fprintf(f_log,"%5d: Wrong initial letters '%s' for group '%s'\n",linenum,rule_match,group_name);
			error_count++;
		}
	}
	strcpy(&output[len],rule_match);
	len += strlen(rule_match);

	if(debug_flag)
	{
		output[len] = RULE_LINENUM;
		output[len+1] = (linenum % 255) + 1;
		output[len+2] = (linenum / 255) + 1;
		len+=3;
	}

	if(rule_cond[0] != 0)
	{
		ix = -1;
		if(rule_cond[0] == '!')
		{
			// allow the rule only if the condition number is NOT set for the voice
			ix = atoi(&rule_cond[1]) + 32;
		}
		else
		{
			// allow the rule only if the condition number is set for the voice
			ix = atoi(rule_cond);
		}

		if((ix > 0) && (ix < 255))
		{
			output[len++] = RULE_CONDITION;
			output[len++] = ix;
		}
		else
		{
			fprintf(f_log,"%5d: bad condition number ?%d\n",linenum,ix);
			error_count++;
		}
	}
	if(rule_pre[0] != 0)
	{
		output[len++] = RULE_PRE;
		// output PRE string in reverse order
		for(ix = strlen(rule_pre)-1; ix>=0; ix--)
			output[len++] = rule_pre[ix];
	}

	if(rule_post[0] != 0)
	{
		sprintf(&output[len],"%c%s",RULE_POST,rule_post);
		len += (strlen(rule_post)+1);
	}
	output[len++]=0;
	prule = (char *)malloc(len);
	memcpy(prule,output,len);
	return(prule);
}  //  end of compile_rule
コード例 #4
0
ファイル: compiledict.cpp プロジェクト: HeryLong/booxsdk
static int compile_line(char *linebuf, char *dict_line, int *hash)
{//===============================================================
// Compile a line in the language_list file
	unsigned char  c;
	char *p;
	char *word;
	char *phonetic;
	unsigned int  ix;
	int  step;
	unsigned int  n_flag_codes = 0;
	int  flag_offset;
	int  length;
	int  multiple_words = 0;
	char *multiple_string = NULL;
	char *multiple_string_end = NULL;
	
	int len_word;
	int len_phonetic;
	int text_not_phonemes;   // this word specifies replacement text, not phonemes
	unsigned int  wc;
	int all_upper_case;
	
	char *mnemptr;
	char *comment;
	unsigned char flag_codes[100];
	char encoded_ph[200];
	unsigned char bad_phoneme[4];
static char nullstring[] = {0};

	comment = NULL;
	text_not_phonemes = 0;
	phonetic = word = nullstring;

if(memcmp(linebuf,"_-",2)==0)
{
step=1;  // TEST
}
	p = linebuf;
//	while(isspace2(*p)) p++;

#ifdef deleted
	if(*p == '$')
	{
		if(memcmp(p,"$textmode",9) == 0)
		{
			text_mode = 1;
			return(0);
		}
		if(memcmp(p,"$phonememode",12) == 0)
		{
			text_mode = 0;
			return(0);
		}
	}
#endif

	step = 0;
	
	c = 0;
	while(c != '\n')
	{
		c = *p;
	
		if((c == '?') && (step==0))
		{
			// conditional rule, allow only if the numbered condition is set for the voice
			flag_offset = 100;

			p++;
			if(*p == '!')
			{
				// allow only if the numbered condition is NOT set
				flag_offset = 132;
				p++;
			}

			ix = 0;
			if(isdigit(*p))
			{
				ix += (*p-'0');
				p++;
			}
			if(isdigit(*p))
			{
				ix = ix*10 + (*p-'0');
				p++;
			}
			flag_codes[n_flag_codes++] = ix + flag_offset;
			c = *p;
		}
		
		if((c == '$') && isalnum(p[1]))
		{
			/* read keyword parameter */
			mnemptr = p;
			while(!isspace2(c = *p)) p++;
			*p = 0;
	
			ix = LookupMnem(mnem_flags,mnemptr);
			if(ix > 0)
			{
				if(ix == 200)
				{
					text_mode = 1;
				}
				else
				if(ix == 201)
				{
					text_mode = 0;
				}
				else
				if(ix == BITNUM_FLAG_TEXTMODE)
				{
					text_not_phonemes = 1;
				}
				else
				{
					flag_codes[n_flag_codes++] = ix;
				}
			}
			else
			{
				fprintf(f_log,"%5d: Unknown keyword: %s\n",linenum,mnemptr);
				error_count++;
			}
		}
	
		if((c == '/') && (p[1] == '/') && (multiple_words==0))
		{
			c = '\n';   /* "//" treat comment as end of line */
			comment = p;
		}
	
		switch(step)
		{
		case 0:
			if(c == '(')
			{
				multiple_words = 1;
				word = p+1;
				step = 1;
			}
			else
			if(!isspace2(c))
			{
				word = p;
				step = 1;
			}
			break;
	
		case 1:
			if((c == '-') && (word[0] != '_'))
			{
				flag_codes[n_flag_codes++] = BITNUM_FLAG_HYPHENATED;
				c = ' ';
			}
			if(isspace2(c))
			{
				p[0] = 0;   /* terminate english word */

				if(multiple_words)
				{
					multiple_string = multiple_string_end = p+1;
					step = 2;
				}
				else
				{
					step = 3;
				}
			}
			else
			if((c == ')') && multiple_words)
			{
				p[0] = 0;
				step = 3;
				multiple_words = 0;
			}
			break;

		case 2:
			if(isspace2(c))
			{
				multiple_words++;
			}
			else
			if(c == ')')
			{
				p[0] = ' ';   // terminate extra string
				multiple_string_end = p+1;
				step = 3;
			}
			break;
	
		case 3:
			if(!isspace2(c))
			{
				phonetic = p;
				step = 4;
			}
			break;
	
		case 4:
			if(isspace2(c))
			{
				p[0] = 0;   /* terminate phonetic */
				step = 5;
			}
			break;
	
		case 5:
			break;
		}
		p++;
	}
	
	if(word[0] == 0)
	{
#ifdef OPT_FORMAT
		if(comment != NULL)
			fprintf(f_log,"%s",comment);
		else
			fputc('\n',f_log);
#endif
		return(0);   /* blank line */
	}

	if(text_mode)
		text_not_phonemes = 1;

	if(text_not_phonemes != translator->langopts.textmode)
	{
		flag_codes[n_flag_codes++] = BITNUM_FLAG_TEXTMODE;
	}

	if(text_not_phonemes)
	{
		// this is replacement text, so don't encode as phonemes. Restrict the length of the replacement word
		strncpy0(encoded_ph,phonetic,N_WORD_BYTES-4);
	}
	else
	{
		EncodePhonemes(phonetic,encoded_ph,bad_phoneme);
		if(strchr(encoded_ph,phonSWITCH) != 0)
		{
			flag_codes[n_flag_codes++] = BITNUM_FLAG_ONLY_S;  // don't match on suffixes (except 's') when switching languages
		}

		// check for errors in the phonemes codes
		for(ix=0; ix<sizeof(encoded_ph); ix++)
		{
			c = encoded_ph[ix];
			if(c == 0)   break;
		
			if(c == 255)
			{
				/* unrecognised phoneme, report error */
				fprintf(f_log,"%5d: Bad phoneme [%c] (0x%x) in: %s  %s\n",linenum,bad_phoneme[0],bad_phoneme[0],word,phonetic);
				error_count++;
			}
		}
	}

	if(sscanf(word,"U+%x",&wc) == 1)
	{
		// Character code
		ix = utf8_out(wc, word);
		word[ix] = 0;
	}
	else
	if(word[0] != '_')
	{
		// convert to lower case, and note if the word is all-capitals
		int c2;

		all_upper_case = 1;
		p = word;
		for(p=word;;)
		{
			// this assumes that the lower case char is the same length as the upper case char
			// OK, except for Turkish "I", but use towlower() rather than towlower2()
			ix = utf8_in(&c2,p);
			if(c2 == 0)
				break;
			if(iswupper(c2))
			{
				utf8_out(towlower(c2),p);
			}
			else
			{
				all_upper_case = 0;
			}
			p += ix;
		}
		if(all_upper_case)
		{
			flag_codes[n_flag_codes++] = BITNUM_FLAG_ALLCAPS;
		}
	}

	len_word = strlen(word);

	if(transpose_offset > 0)
	{
		len_word = TransposeAlphabet(word, transpose_offset, transpose_min, transpose_max);
	}

	*hash = HashDictionary(word);
	len_phonetic = strlen(encoded_ph);
	
	dict_line[1] = len_word;   // bit 6 indicates whether the word has been compressed
	len_word &= 0x3f;

	memcpy(&dict_line[2],word,len_word);

	if(len_phonetic == 0)
	{
		// no phonemes specified. set bit 7
		dict_line[1] |= 0x80;
		length = len_word + 2;
	}
	else
	{
		length = len_word + len_phonetic + 3;
		strcpy(&dict_line[(len_word)+2],encoded_ph);
	}
	
	for(ix=0; ix<n_flag_codes; ix++)
	{
		dict_line[ix+length] = flag_codes[ix];
	}
	length += n_flag_codes;

	if((multiple_string != NULL) && (multiple_words > 0))
	{
		if(multiple_words > 10)
		{
			fprintf(f_log,"%5d: Two many parts in a multi-word entry: %d\n",linenum,multiple_words);
		}
		else
		{
			dict_line[length++] = 80 + multiple_words;
			ix = multiple_string_end - multiple_string;
			memcpy(&dict_line[length],multiple_string,ix);
			length += ix;
		}
	}
	dict_line[0] = length;

#ifdef OPT_FORMAT
	spaces = 16;
	for(ix=0; ix<n_flag_codes; ix++)
	{
		if(flag_codes[ix] >= 100)
		{
			fprintf(f_log,"?%d ",flag_codes[ix]-100);
			spaces -= 3;
		}
	}

	fprintf(f_log,"%s",word);
	spaces -= strlen(word);
	DecodePhonemes(encoded_ph,decoded_ph);
	while(spaces-- > 0) fputc(' ',f_log);
	spaces += (14 - strlen(decoded_ph));
	
	fprintf(f_log," %s",decoded_ph);
	while(spaces-- > 0) fputc(' ',f_log);
	for(ix=0; ix<n_flag_codes; ix++)
	{
		if(flag_codes[ix] < 100)
			fprintf(f_log," %s",lookup_mnem(mnem_flags,flag_codes[ix]));
	}
	if(comment != NULL)
		fprintf(f_log," %s",comment);
	else
		fputc('\n',f_log);
#endif

	return(length);
}  /* end of compile_line */
コード例 #5
0
ファイル: compiledict.cpp プロジェクト: HeryLong/booxsdk
static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp)
{//====================================================================
	char *prule;
	unsigned char *p;
	int ix;
	int c;
	int gp;
	FILE *f_temp;
	int n_rules=0;
	int count=0;
	int different;
	const char *prev_rgroup_name;
	unsigned int char_code;
	int compile_mode=0;
	char *buf;
	char buf1[200];
	char *rules[N_RULES];

	int n_rgroups = 0;
	RGROUP rgroup[N_RULE_GROUP2];
	
	linenum = 0;
	group_name[0] = 0;

	if((f_temp = fopen_log(fname_temp,"wb")) == NULL)
		return(1);

	for(;;)
	{
		linenum++;
		buf = fgets(buf1,sizeof(buf1),f_in);
		if(buf != NULL)
		{
			if((p = (unsigned char *)strstr(buf,"//")) != NULL)
				*p = 0;

			if(buf[0] == '\r') buf++;  // ignore extra \r in \r\n 
		}

		if((buf == NULL) || (buf[0] == '.'))
		{
			// next .group or end of file, write out the previous group

			if(n_rules > 0)
			{
				strcpy(rgroup[n_rgroups].name,group_name);
				rgroup[n_rgroups].start = ftell(f_temp);
				output_rule_group(f_temp,n_rules,rules,group_name);
				rgroup[n_rgroups].length = ftell(f_temp) - rgroup[n_rgroups].start;
				n_rgroups++;

				count += n_rules;
			}
			n_rules = 0;

			if(compile_mode == 2)
			{
				// end of the character replacements section
				fwrite(&n_rules,1,4,f_out);   // write a zero word to terminate the replacemenmt list
				compile_mode = 0;
			}

			if(buf == NULL) break;   // end of file

			if(memcmp(buf,".L",2)==0)
			{
				compile_lettergroup(&buf[2], f_out);
				continue;
			}

			if(memcmp(buf,".replace",8)==0)
			{
				compile_mode = 2;
				fputc(RULE_GROUP_START,f_out);
				fputc(RULE_REPLACEMENTS,f_out);

				// advance to next word boundary
				while((ftell(f_out) & 3) != 0)
					fputc(0,f_out);
			}

			if(memcmp(buf,".group",6)==0)
			{
				compile_mode = 1;

				p = (unsigned char *)&buf[6];
				while((p[0]==' ') || (p[0]=='\t')) p++;    // Note: Windows isspace(0xe1) gives TRUE !
				ix = 0;
				while((*p > ' ') && (ix < LEN_GROUP_NAME))
					group_name[ix++] = *p++;
				group_name[ix]=0;
	
				if(sscanf(group_name,"0x%x",&char_code)==1)
				{
					// group character is given as a character code (max 16 bits)
					p = (unsigned char *)group_name;
	
					if(char_code > 0x100)
					{
						*p++ = (char_code >> 8);
					}
					*p++ = char_code;
					*p = 0;
				}
	
				if(strlen(group_name) > 2)
				{
					if(utf8_in(&c,group_name) < 2)
					{
						fprintf(f_log,"%5d: Group name longer than 2 bytes (UTF8)",linenum);
						error_count++;
					}
	
					group_name[2] = 0;
				}
			}