char * next_nonspace(char *s) { char *ret = s; while (*ret && isspace2(*ret)) ret++; return ret; }
/* * ParseDirective * Note: macro directives are handled before recording */ void ParseDirective(const char *cline) { char *line = strdup(strltrim(cline) + 1); // skip '.' and allow strtok //printf("'%s'\n", file->line); //printf("'%s'\n", line); bool valid_directive = false; //bool done_directive = true; //printf("Line: [%s]\n", cline); /* * macro ending directives */ if (DIRECTIVE("endm", PARSE_MACRO_DIRECTIVES)) { current_macro = 0; parse_directives = PARSE_ALL_DIRECTIVES; } else if (DIRECTIVE("endr", PARSE_REPT_DIRECTIVES)) { current_macro = 0; parse_directives = PARSE_ALL_DIRECTIVES; while (repeat-- > 0) MacroExecute("_rept"); } /* * record to macro */ if (current_macro) { MacroLine(cline); // record full line goto exit; // only process macro ending directives } /* * macro starting directives */ if (DIRECTIVE("macro", PARSE_MACRO_DIRECTIVES) || DIRECTIVE("macroicase", PARSE_MACRO_DIRECTIVES)) { char *name = strtok((char *)strskipspace(line), delim_chars); current_macro = FindMacro(name); if (pass != PASS_ASM) { if (current_macro) { eprintf("Macro name already defined.\n"); eexit(); } current_macro = NewMacro(name, DIRECTIVE("macroicase", PARSE_MACRO_DIRECTIVES)); EEKS{printf("new macro at %p\n", current_macro);} char *paramname; while ((paramname = strtok(0, delim_chars))) { if (isspace2(paramname[0])) paramname = strskipspace(paramname); if (strchr(endline_chars, *paramname)) break; current_macro->AddParameter(paramname); } } parse_directives = PARSE_MACRO_DIRECTIVES; }
static void copy_rule_string(char *string, int &state) {//=================================================== // state 0: conditional, 1=pre, 2=match, 3=post, 4=phonemes static char *outbuf[5] = {rule_cond, rule_pre, rule_match, rule_post, rule_phonemes}; static int next_state[5] = {2,2,4,4,4}; char *output; char *p; int ix; int len; char c; int sxflags; int value; int literal; if(string[0] == 0) return; output = outbuf[state]; if(state==4) { // append to any previous phoneme string, i.e. allow spaces in the phoneme string len = strlen(rule_phonemes); if(len > 0) rule_phonemes[len++] = ' '; output = &rule_phonemes[len]; } sxflags = 0x808000; // to ensure non-zero bytes for(p=string,ix=0;;) { literal = 0; c = *p++; if(c == '\\') { c = *p++; // treat next character literally if((c >= '0') && (c <= '3') && (p[0] >= '0') && (p[0] <= '7') && (p[1] >= '0') && (p[1] <= '7')) { // character code given by 3 digit octal value; c = (c-'0')*64 + (p[0]-'0')*8 + (p[1]-'0'); p += 2; } literal = 1; } if((state==1) || (state==3)) { // replace special characters (note: 'E' is reserved for a replaced silent 'e') if(literal == 0) { static const char lettergp_letters[9] = {LETTERGP_A,LETTERGP_B,LETTERGP_C,0,0,LETTERGP_F,LETTERGP_G,LETTERGP_H,LETTERGP_Y}; switch(c) { case '_': c = RULE_SPACE; break; case 'Y': c = 'I'; // drop through to next case case 'A': // vowel case 'B': case 'C': case 'H': case 'F': case 'G': if(state == 1) { // pre-rule, put the number before the RULE_LETTERGP; output[ix++] = lettergp_letters[c-'A'] + 'A'; c = RULE_LETTERGP; } else { output[ix++] = RULE_LETTERGP; c = lettergp_letters[c-'A'] + 'A'; } break; case 'D': c = RULE_DIGIT; break; case 'K': c = RULE_NOTVOWEL; break; case 'N': c = RULE_NO_SUFFIX; break; case 'V': c = RULE_IFVERB; break; case 'Z': c = RULE_NONALPHA; break; case '+': c = RULE_INC_SCORE; break; case '@': c = RULE_SYLLABLE; break; case '&': c = RULE_STRESSED; break; case '%': c = RULE_DOUBLE; break; case '#': c = RULE_DEL_FWD; break; case '!': c = RULE_CAPITAL; break; case 'T': c = RULE_ALT1; break; case 'W': c = RULE_SPELLING; break; case 'X': c = RULE_NOVOWELS; break; case 'L': // expect two digits c = *p++ - '0'; value = *p++ - '0'; c = c * 10 + value; if((value < 0) || (value > 9)) { c = 0; fprintf(f_log,"%5d: Expected 2 digits after 'L'\n",linenum); error_count++; } else if((c <= 0) || (c >= N_LETTER_GROUPS) || (letterGroupsDefined[(int)c] == 0)) { fprintf(f_log,"%5d: Letter group L%.2d not defined\n",linenum,c); error_count++; } c += 'A'; if(state == 1) { // pre-rule, put the group number before the RULE_LETTERGP command output[ix++] = c; c = RULE_LETTERGP2; } else { output[ix++] = RULE_LETTERGP2; } break; case '$': // obsolete, replaced by S fprintf(f_log,"%5d: $ now not allowed, use S for suffix",linenum); error_count++; break; case 'P': sxflags |= SUFX_P; // Prefix, now drop through to Suffix case 'S': output[ix++] = RULE_ENDING; value = 0; while(!isspace2(c = *p++) && (c != 0)) { switch(c) { case 'e': sxflags |= SUFX_E; break; case 'i': sxflags |= SUFX_I; break; case 'p': // obsolete, replaced by 'P' above sxflags |= SUFX_P; break; case 'v': sxflags |= SUFX_V; break; case 'd': sxflags |= SUFX_D; break; case 'f': sxflags |= SUFX_F; break; case 'q': sxflags |= SUFX_Q; break; case 't': sxflags |= SUFX_T; break; case 'b': sxflags |= SUFX_B; break; default: if(isdigit(c)) value = (value*10) + (c - '0'); break; } } p--; output[ix++] = sxflags >> 16; output[ix++] = sxflags >> 8; c = value | 0x80; break; } } } output[ix++] = c; if(c == 0) break; } state = next_state[state]; } // end of copy_rule_string
static int compile_line(char *linebuf, char *dict_line, int *hash) {//=============================================================== // Compile a line in the language_list file unsigned char c; char *p; char *word; char *phonetic; unsigned int ix; int step; unsigned int n_flag_codes = 0; int flag_offset; int length; int multiple_words = 0; char *multiple_string = NULL; char *multiple_string_end = NULL; int len_word; int len_phonetic; int text_not_phonemes; // this word specifies replacement text, not phonemes unsigned int wc; int all_upper_case; char *mnemptr; char *comment; unsigned char flag_codes[100]; char encoded_ph[200]; unsigned char bad_phoneme[4]; static char nullstring[] = {0}; comment = NULL; text_not_phonemes = 0; phonetic = word = nullstring; if(memcmp(linebuf,"_-",2)==0) { step=1; // TEST } p = linebuf; // while(isspace2(*p)) p++; #ifdef deleted if(*p == '$') { if(memcmp(p,"$textmode",9) == 0) { text_mode = 1; return(0); } if(memcmp(p,"$phonememode",12) == 0) { text_mode = 0; return(0); } } #endif step = 0; c = 0; while(c != '\n') { c = *p; if((c == '?') && (step==0)) { // conditional rule, allow only if the numbered condition is set for the voice flag_offset = 100; p++; if(*p == '!') { // allow only if the numbered condition is NOT set flag_offset = 132; p++; } ix = 0; if(isdigit(*p)) { ix += (*p-'0'); p++; } if(isdigit(*p)) { ix = ix*10 + (*p-'0'); p++; } flag_codes[n_flag_codes++] = ix + flag_offset; c = *p; } if((c == '$') && isalnum(p[1])) { /* read keyword parameter */ mnemptr = p; while(!isspace2(c = *p)) p++; *p = 0; ix = LookupMnem(mnem_flags,mnemptr); if(ix > 0) { if(ix == 200) { text_mode = 1; } else if(ix == 201) { text_mode = 0; } else if(ix == BITNUM_FLAG_TEXTMODE) { text_not_phonemes = 1; } else { flag_codes[n_flag_codes++] = ix; } } else { fprintf(f_log,"%5d: Unknown keyword: %s\n",linenum,mnemptr); error_count++; } } if((c == '/') && (p[1] == '/') && (multiple_words==0)) { c = '\n'; /* "//" treat comment as end of line */ comment = p; } switch(step) { case 0: if(c == '(') { multiple_words = 1; word = p+1; step = 1; } else if(!isspace2(c)) { word = p; step = 1; } break; case 1: if((c == '-') && (word[0] != '_')) { flag_codes[n_flag_codes++] = BITNUM_FLAG_HYPHENATED; c = ' '; } if(isspace2(c)) { p[0] = 0; /* terminate english word */ if(multiple_words) { multiple_string = multiple_string_end = p+1; step = 2; } else { step = 3; } } else if((c == ')') && multiple_words) { p[0] = 0; step = 3; multiple_words = 0; } break; case 2: if(isspace2(c)) { multiple_words++; } else if(c == ')') { p[0] = ' '; // terminate extra string multiple_string_end = p+1; step = 3; } break; case 3: if(!isspace2(c)) { phonetic = p; step = 4; } break; case 4: if(isspace2(c)) { p[0] = 0; /* terminate phonetic */ step = 5; } break; case 5: break; } p++; } if(word[0] == 0) { #ifdef OPT_FORMAT if(comment != NULL) fprintf(f_log,"%s",comment); else fputc('\n',f_log); #endif return(0); /* blank line */ } if(text_mode) text_not_phonemes = 1; if(text_not_phonemes != translator->langopts.textmode) { flag_codes[n_flag_codes++] = BITNUM_FLAG_TEXTMODE; } if(text_not_phonemes) { // this is replacement text, so don't encode as phonemes. Restrict the length of the replacement word strncpy0(encoded_ph,phonetic,N_WORD_BYTES-4); } else { EncodePhonemes(phonetic,encoded_ph,bad_phoneme); if(strchr(encoded_ph,phonSWITCH) != 0) { flag_codes[n_flag_codes++] = BITNUM_FLAG_ONLY_S; // don't match on suffixes (except 's') when switching languages } // check for errors in the phonemes codes for(ix=0; ix<sizeof(encoded_ph); ix++) { c = encoded_ph[ix]; if(c == 0) break; if(c == 255) { /* unrecognised phoneme, report error */ fprintf(f_log,"%5d: Bad phoneme [%c] (0x%x) in: %s %s\n",linenum,bad_phoneme[0],bad_phoneme[0],word,phonetic); error_count++; } } } if(sscanf(word,"U+%x",&wc) == 1) { // Character code ix = utf8_out(wc, word); word[ix] = 0; } else if(word[0] != '_') { // convert to lower case, and note if the word is all-capitals int c2; all_upper_case = 1; p = word; for(p=word;;) { // this assumes that the lower case char is the same length as the upper case char // OK, except for Turkish "I", but use towlower() rather than towlower2() ix = utf8_in(&c2,p); if(c2 == 0) break; if(iswupper(c2)) { utf8_out(towlower(c2),p); } else { all_upper_case = 0; } p += ix; } if(all_upper_case) { flag_codes[n_flag_codes++] = BITNUM_FLAG_ALLCAPS; } } len_word = strlen(word); if(transpose_offset > 0) { len_word = TransposeAlphabet(word, transpose_offset, transpose_min, transpose_max); } *hash = HashDictionary(word); len_phonetic = strlen(encoded_ph); dict_line[1] = len_word; // bit 6 indicates whether the word has been compressed len_word &= 0x3f; memcpy(&dict_line[2],word,len_word); if(len_phonetic == 0) { // no phonemes specified. set bit 7 dict_line[1] |= 0x80; length = len_word + 2; } else { length = len_word + len_phonetic + 3; strcpy(&dict_line[(len_word)+2],encoded_ph); } for(ix=0; ix<n_flag_codes; ix++) { dict_line[ix+length] = flag_codes[ix]; } length += n_flag_codes; if((multiple_string != NULL) && (multiple_words > 0)) { if(multiple_words > 10) { fprintf(f_log,"%5d: Two many parts in a multi-word entry: %d\n",linenum,multiple_words); } else { dict_line[length++] = 80 + multiple_words; ix = multiple_string_end - multiple_string; memcpy(&dict_line[length],multiple_string,ix); length += ix; } } dict_line[0] = length; #ifdef OPT_FORMAT spaces = 16; for(ix=0; ix<n_flag_codes; ix++) { if(flag_codes[ix] >= 100) { fprintf(f_log,"?%d ",flag_codes[ix]-100); spaces -= 3; } } fprintf(f_log,"%s",word); spaces -= strlen(word); DecodePhonemes(encoded_ph,decoded_ph); while(spaces-- > 0) fputc(' ',f_log); spaces += (14 - strlen(decoded_ph)); fprintf(f_log," %s",decoded_ph); while(spaces-- > 0) fputc(' ',f_log); for(ix=0; ix<n_flag_codes; ix++) { if(flag_codes[ix] < 100) fprintf(f_log," %s",lookup_mnem(mnem_flags,flag_codes[ix])); } if(comment != NULL) fprintf(f_log," %s",comment); else fputc('\n',f_log); #endif return(length); } /* end of compile_line */
static int compile_lettergroup(char *input, FILE *f_out) {//===================================================== char *p; char *p_start; int group; int ix; int n_items; int length; int max_length = 0; #define N_LETTERGP_ITEMS 200 char *items[N_LETTERGP_ITEMS]; char item_length[N_LETTERGP_ITEMS]; p = input; if(!isdigit(p[0]) || !isdigit(p[1])) { fprintf(f_log,"%5d: Expected 2 digits after '.L'\n",linenum); error_count++; return(1); } group = atoi(&p[0]); if(group >= N_LETTER_GROUPS) { fprintf(f_log,"%5d: lettergroup out of range (01-%.2d)\n",linenum,N_LETTER_GROUPS-1); error_count++; return(1); } while(!isspace2(*p)) p++; fputc(RULE_GROUP_START,f_out); fputc(RULE_LETTERGP2,f_out); fputc(group + 'A', f_out); letterGroupsDefined[group] = 1; n_items = 0; while(n_items < N_LETTERGP_ITEMS) { while(isspace2(*p)) p++; if(*p == 0) break; items[n_items] = p_start = p; while((*p & 0xff) > ' ') { p++; } *p++ = 0; length = p - p_start; if(length > max_length) max_length = length; item_length[n_items++] = length; } // write out the items, longest first while(max_length > 1) { for(ix=0; ix < n_items; ix++) { if(item_length[ix] == max_length) { fwrite(items[ix],1,max_length,f_out); } } max_length--; } fputc(RULE_GROUP_END,f_out); return(0); }
static void Render_String(document_rendering_context_t *cx, char *text) { int l; signed char c; document_rendered_link_t *link; char *txt = text; int t_width = cx->width - cx->l_margin - cx->r_margin; if (t_width < 8) // witdth of text return; if (txt == NULL) return; // make links negative link = cx->inline_links; while (link) { link->start *= -1; link->end *= -1; link = link->next; } while ((c = *txt)) { // count word length for (l=0 ; l < t_width; l++) if (isspace2(txt[l]) || txt[l]=='\n'+(char)128 || !txt[l]) break; // word wrap if (l != t_width && (cx->line_pos + l > cx->width - cx->r_margin) ) LineFeed(cx); // recalculate links indexes to absolute link = cx->inline_links; while (link) { int i = txt - text; if (link->start <= 0 && i >= -link->start) link->start = cx->line*cx->width + cx->line_pos; if (link->end <= 0 && i >= -link->end) link->end = cx->line*cx->width + cx->line_pos; link = link->next; } txt++; switch (c) { case (signed char)('\n' | 128): LineFeed(cx); break; case (signed char)(' ' | 128): cx->line_buf[cx->line_pos++] = ' '; break; default: cx->line_buf[cx->line_pos++] = c; } if (cx->line_pos >= cx->width - cx->r_margin) { LineFeed(cx); // we linefeed because of no space, so skip spaces which are next while (isspace2(*txt)) txt++; } } LineFeed(cx); // add inline links to document links if (cx->links == NULL) cx->links = cx->inline_links; else { link = cx->links; while (link->next) link = link->next; link->next = cx->inline_links; } cx->inline_links = NULL; }