static void
unicode_subst (int src, int dest, int nr, string fn) {
  for (int i=0; i<nr; i++) {
    string csrc = upcase_all ("<#" * as_hexadecimal (src  + i) * ">");
    string cdest= upcase_all ("<#" * as_hexadecimal (dest + i) * ">");
    if (dest + i < 128) cdest= string ((char) (dest + i));
    substitution_char (csrc)= cdest;
    substitution_font (csrc)= fn;
    csrc= locase_all (csrc);
    substitution_char (csrc)= cdest;
    substitution_font (csrc)= fn;
    csrc= rewrite_math (csrc);
    substitution_char (csrc)= cdest;
    substitution_font (csrc)= fn;
  }
}
Beispiel #2
0
string
xml_html_parser::transcode (string s2) {
  s= parse_string (s2);

  string encoding;
  if (test (s, "<?")) {
    s += 2;
    string target= parse_name ();
    skip_space ();
    if (target == "xml") {
      // since html==true implies we can accept horribly broken HTML, the
      // presence of an XML prolog is not enough to clear the flag.
      /* html= false; */
      while (s && !test (s, "?>")) {
	string attname= parse_name ();
	skip_space ();
	if (!test (s, "=")) break;
	s += 1;
	skip_space ();
	string val;
	if (test (s, "\"")) {
	  s += 1;
	  val= parse_until ("\"");
	  skip_space ();	  
	}
	else if (test (s, "'")) {
	  s += 1;
	  val= parse_until ("'");
	  skip_space ();
	}
	if (attname == "encoding") {
	  encoding= upcase_all (val);
	  break;
	}
      }
    }
  }

  if (N(encoding) != 0) {
    // cout << "encoding was specified\n" ;
    string s3= convert (s2, encoding, "UTF-8");
    if (N(s3) == 0)
      /* conversion from specified charset failed, do nothing (and pray) */ ;
    else return s3;
  }
  else {
    // cout << "guess encoding\n" ;
    if (check_encoding (s2, "UTF-8"))
      /* input encoding seems to be utf-8, do nothing */ ;
    else {
      string s3= convert (s2, "ISO-8859-1", "UTF-8");
      if (N(s3) != 0) return s3;
    }
  }

  return s2;
}
static bool
is_greek (string c) {
  static hashmap<string,bool> t (false);
  if (N(t) == 0) {
    array<int> a;
    //for (int i= 0x391; i<0x3a9; i++) if (i != 0x3a2) a << i;
    for (int i= 0x3b1; i<0x3c9; i++) a << i;
    for (int i= 0; i<N(a); i++) {
      string s= upcase_all ("<#" * as_hexadecimal (a[i]) * ">");
      t (s)= true;
      t (locase_all (s))= true;
      t (rewrite_math (s))= true;
    }
  }
  return t[c];
}
int
smart_font_rep::resolve (string c, string fam, int attempt) { 
  //cout << "Resolve " << c << " in " << fam << ", attempt " << attempt << "\n";
  array<string> a= trimmed_tokenize (fam, "=");
  if (N(a) >= 2) {
    array<string> given= logical_font (family, variant, series, rshape);
    fam= a[1];
    array<string> b= tokenize (a[0], " ");
    for (int i=0; i<N(b); i++) {
      if (b[i] == "") continue;
      bool ok= false;
      array<string> v= tokenize (b[i], "|");
      for (int j=0; j<N(v); j++) {
        string wanted= locase_all (v[j]);
        if (wanted == "") ok= true;
        else if (contains (wanted, given)) ok= true;
        else if (wanted == get_unicode_range (c)) ok= true;
        else if (wanted == substitute_math_letter (c, 2)) ok= true;
        else if (wanted == c) ok= true;
        else {
          array<string> w= tokenize (v[j], ":");
          if (N(w) == 1) w << w[0];
          if (N(w) == 2) {
            int code = get_utf8_code (c);
            int start= get_utf8_code (w[0]);
            int end  = get_utf8_code (w[1]);
            if (code != -1 && code >= start && code <= end) ok= true;
          }
        }
      }
      if (!ok) return -1;
    }
  }

  if (attempt == 1) {
    bool ok= true;
    if (fam == "cal" || fam == "cal*" ||
        fam == "Bbb" || fam == "Bbb****")
      ok= ok && is_alpha (c) && upcase_all (c) == c;
    if (fam == "cal**" || fam == "Bbb*")
      ok= ok && is_alpha (c);
    if (!ok) return -1;

    if (fam == mfam) {
      if (fn[SUBFONT_MAIN]->supports (c))
        return sm->add_char (tuple ("main"), c);
    }
    else {
      font cfn= closest_font (fam, variant, series, rshape, sz, dpi, 1);
      if (cfn->supports (c)) {
        tree key= tuple (fam, variant, series, rshape, "1");
        return sm->add_char (key, c);
      }
    }

    if (is_math_family (fam)) {
      tree key= tuple ("math", fam, variant, series, rshape);
      int nr= sm->add_font (key, REWRITE_MATH);
      initialize_font (nr);
      if (fn[nr]->supports (rewrite (c, REWRITE_MATH)))
        return sm->add_char (key, c);
    }
    if (fam == "roman" && N(c) > 1) {
      tree key= tuple ("cyrillic", fam, variant, series, rshape);
      int nr= sm->add_font (key, REWRITE_CYRILLIC);
      initialize_font (nr);
      if (fn[nr]->supports (rewrite (c, REWRITE_CYRILLIC)))
        return sm->add_char (key, c);
    }
  }

  if (attempt > 1) {
    string range= get_unicode_range (c);
    if (range != "") {
      int a= attempt - 1;
      string v= variant;
      if (v == "rm") v= range;
      else v= v * "-" * range;
      font cfn= closest_font (fam, v, series, rshape, sz, dpi, a);
      //cout << "Trying " << c << " in " << cfn->res_name << "\n";
      if (cfn->supports (c)) {
        tree key= tuple (fam, v, series, rshape, as_string (a));
        return sm->add_char (key, c);
      }
    }
  }

  return -1;
}
Beispiel #5
0
static void
translit_set (int i, string s) {
  string h= as_hexadecimal (i);
  translit_table ("<#" * locase_all (h) * ">")= s;
  translit_table ("<#" * upcase_all (h) * ">")= s;
}