static void unicode_subst (int src, int dest, int nr, string fn) { for (int i=0; i<nr; i++) { string csrc = upcase_all ("<#" * as_hexadecimal (src + i) * ">"); string cdest= upcase_all ("<#" * as_hexadecimal (dest + i) * ">"); if (dest + i < 128) cdest= string ((char) (dest + i)); substitution_char (csrc)= cdest; substitution_font (csrc)= fn; csrc= locase_all (csrc); substitution_char (csrc)= cdest; substitution_font (csrc)= fn; csrc= rewrite_math (csrc); substitution_char (csrc)= cdest; substitution_font (csrc)= fn; } }
string xml_html_parser::transcode (string s2) { s= parse_string (s2); string encoding; if (test (s, "<?")) { s += 2; string target= parse_name (); skip_space (); if (target == "xml") { // since html==true implies we can accept horribly broken HTML, the // presence of an XML prolog is not enough to clear the flag. /* html= false; */ while (s && !test (s, "?>")) { string attname= parse_name (); skip_space (); if (!test (s, "=")) break; s += 1; skip_space (); string val; if (test (s, "\"")) { s += 1; val= parse_until ("\""); skip_space (); } else if (test (s, "'")) { s += 1; val= parse_until ("'"); skip_space (); } if (attname == "encoding") { encoding= upcase_all (val); break; } } } } if (N(encoding) != 0) { // cout << "encoding was specified\n" ; string s3= convert (s2, encoding, "UTF-8"); if (N(s3) == 0) /* conversion from specified charset failed, do nothing (and pray) */ ; else return s3; } else { // cout << "guess encoding\n" ; if (check_encoding (s2, "UTF-8")) /* input encoding seems to be utf-8, do nothing */ ; else { string s3= convert (s2, "ISO-8859-1", "UTF-8"); if (N(s3) != 0) return s3; } } return s2; }
static bool is_greek (string c) { static hashmap<string,bool> t (false); if (N(t) == 0) { array<int> a; //for (int i= 0x391; i<0x3a9; i++) if (i != 0x3a2) a << i; for (int i= 0x3b1; i<0x3c9; i++) a << i; for (int i= 0; i<N(a); i++) { string s= upcase_all ("<#" * as_hexadecimal (a[i]) * ">"); t (s)= true; t (locase_all (s))= true; t (rewrite_math (s))= true; } } return t[c]; }
int smart_font_rep::resolve (string c, string fam, int attempt) { //cout << "Resolve " << c << " in " << fam << ", attempt " << attempt << "\n"; array<string> a= trimmed_tokenize (fam, "="); if (N(a) >= 2) { array<string> given= logical_font (family, variant, series, rshape); fam= a[1]; array<string> b= tokenize (a[0], " "); for (int i=0; i<N(b); i++) { if (b[i] == "") continue; bool ok= false; array<string> v= tokenize (b[i], "|"); for (int j=0; j<N(v); j++) { string wanted= locase_all (v[j]); if (wanted == "") ok= true; else if (contains (wanted, given)) ok= true; else if (wanted == get_unicode_range (c)) ok= true; else if (wanted == substitute_math_letter (c, 2)) ok= true; else if (wanted == c) ok= true; else { array<string> w= tokenize (v[j], ":"); if (N(w) == 1) w << w[0]; if (N(w) == 2) { int code = get_utf8_code (c); int start= get_utf8_code (w[0]); int end = get_utf8_code (w[1]); if (code != -1 && code >= start && code <= end) ok= true; } } } if (!ok) return -1; } } if (attempt == 1) { bool ok= true; if (fam == "cal" || fam == "cal*" || fam == "Bbb" || fam == "Bbb****") ok= ok && is_alpha (c) && upcase_all (c) == c; if (fam == "cal**" || fam == "Bbb*") ok= ok && is_alpha (c); if (!ok) return -1; if (fam == mfam) { if (fn[SUBFONT_MAIN]->supports (c)) return sm->add_char (tuple ("main"), c); } else { font cfn= closest_font (fam, variant, series, rshape, sz, dpi, 1); if (cfn->supports (c)) { tree key= tuple (fam, variant, series, rshape, "1"); return sm->add_char (key, c); } } if (is_math_family (fam)) { tree key= tuple ("math", fam, variant, series, rshape); int nr= sm->add_font (key, REWRITE_MATH); initialize_font (nr); if (fn[nr]->supports (rewrite (c, REWRITE_MATH))) return sm->add_char (key, c); } if (fam == "roman" && N(c) > 1) { tree key= tuple ("cyrillic", fam, variant, series, rshape); int nr= sm->add_font (key, REWRITE_CYRILLIC); initialize_font (nr); if (fn[nr]->supports (rewrite (c, REWRITE_CYRILLIC))) return sm->add_char (key, c); } } if (attempt > 1) { string range= get_unicode_range (c); if (range != "") { int a= attempt - 1; string v= variant; if (v == "rm") v= range; else v= v * "-" * range; font cfn= closest_font (fam, v, series, rshape, sz, dpi, a); //cout << "Trying " << c << " in " << cfn->res_name << "\n"; if (cfn->supports (c)) { tree key= tuple (fam, v, series, rshape, as_string (a)); return sm->add_char (key, c); } } } return -1; }
static void translit_set (int i, string s) { string h= as_hexadecimal (i); translit_table ("<#" * locase_all (h) * ">")= s; translit_table ("<#" * upcase_all (h) * ">")= s; }