Example #1
0
static string
parse_identifier (string s, int &i) {
  int n= N(s), start= i;
  if (i<n && start_ident (s[i])) {
    decode_from_utf8 (s, i);
    while (i<n && continue_ident (s[i]))
      decode_from_utf8 (s, i);
  }
  return as_string (from_verbatim (s (start, i)));
}
Example #2
0
int
get_utf8_code (string c) {
  string uc= cork_to_utf8 (c);
  int pos= 0;
  int code= decode_from_utf8 (uc, pos);
  if (pos == N(uc)) return code;
  else return -1;
}
Example #3
0
int
str_length (string s, bool utf8) {
  if (utf8) {
    int i=0, r=0;
    while (i < N(s)) {
      decode_from_utf8 (s, i);
      r++;
    }
    return r;
  }
  else return N(s);
}
Example #4
0
void
goto_next_char (string s, int &i, bool utf8) {
  if (utf8) decode_from_utf8 (s, i);
  else if (i < N(s)) {
    if (s[i] == '<') {
      i++;
      while (i < N(s) && s[i] != '>') i++;
      if (i < N(s)) i++;
    }
    else i++;
  }
}
Example #5
0
string
get_unicode_range (string c) {
  string uc= cork_to_utf8 (c);
  int pos= 0;
  int code= decode_from_utf8 (uc, pos);
  string range= "";
  if (code <= 0x7f) range= "ascii";
  else if (code >= 0x80 && code <= 0x37f) range= "latin";
  else if (code >= 0x380 && code <= 0x3ff) range= "greek";
  else if (code >= 0x400 && code <= 0x4ff) range= "cyrillic";
  else if (code >= 0x3000 && code <= 0x303f) range= "cjk";
  else if (code >= 0x4e00 && code <= 0x9fcc) range= "cjk";
  else if (code >= 0xff00 && code <= 0xffef) range= "cjk";
  else if (code >= 0xac00 && code <= 0xd7af) range= "hangul";
  else if (code >= 0x2000 && code <= 0x23ff) range= "mathsymbols";
  else if (code >= 0x2900 && code <= 0x2e7f) range= "mathextra";
  else if (code >= 0x1d400 && code <= 0x1d7ff) range= "mathletters";
  if (pos == N(uc)) return range;
  return "";
}
Example #6
0
static tree
coqdoc_to_tree (string s) {
  bool newline= true;
  int i=0, n= N(s);
  tree coqdoc (DOCUMENT), line (CONCAT);
  if (starts (s, "(**")) {
    line << "(**";
    i+= 3;
  }
  while (i < n) {
    if (test (s, i, "[[\n")) {
      add_line (line, coqdoc);
      tree vernac=
        vernac_to_tree (parse_delimited (s, i, "[[\n", "\n]]", false));
      coqdoc << compound ("coqdoc-vernac", vernac);
      newline= true;
    }
    else if (s[i] == '[')
      line << compound ("coqdoc-coq",
          from_verbatim (parse_delimited (s, i, "[", "]", false)));
    else if (newline && (test (s, i, "**** ") || test (s, i, "*** ") ||
                         test (s, i, "** ")   || test (s, i, "* "))) {
      string header= "section";
      if (test (s, i, "** "))    header= "subsection";
      if (test (s, i, "*** "))   header= "subsubsection";
      if (test (s, i, "**** "))  header= "paragraph";
      while (i<n && s[i] == '*') i++;
      while (i<n && is_spacing (s[i])) i++;
      int start= i;
      while (i<n && (s[i] != '\n' && !test (s, i, "*)"))) i++;
      line << compound (header, coqdoc_to_tree (s (start, i)));
    }
    else if (newline && is_defining_pretty_printing (s, i)) {
      string str= parse_delimited (s, i, "(*", "*)", false);
      parse_pretty_printing_definition (str);
    }
    else if (newline && is_removing_pretty_printing (s, i)) {
      string str= parse_delimited (s, i, "(*", "*)", false);
      parse_pretty_printing_removal (str);
    }
    else if (test (s, i, "%%")) {
      line << "%";
      newline= false;
      i+= 2;
    }
    else if (test (s, i, "$$")) {
      line << "$";
      newline= false;
      i+= 2;
    }
    else if (test (s, i, "##")) {
      line << "#";
      newline= false;
      i+= 2;
    }
    else if (s[i] == '#' || s[i] == '%' || s[i] == '$') {
      newline= false;
      char delim= s[i];
      string ext= unescape_coqdoc (parse_delimited (s, i, delim));
      tree tm;
      if (delim == '#')
        tm= compound ("coqdoc-html", generic_to_tree (ext, "html-snippet"));
      else if (delim == '$')
        tm= compound ("coqdoc-latex",
              generic_to_tree ("$"*ext*"$", "latex-snippet"));
      else if (delim == '%')
        tm= compound ("coqdoc-latex", generic_to_tree (ext, "latex-snippet"));
      if (is_multi_paragraph (tm)) {
        add_line (line, coqdoc);
        coqdoc << tm;
      }
      else
        line << tm;
    }
    else if (is_list_begining (s, i)) {
      tree list= parse_list (s, i);
      add_line (line, coqdoc);
      coqdoc << list;
      newline= true;
    }
    else if (test (s, i, "\n<<")) {
      add_line (line, coqdoc);
      string parsed= parse_delimited (s, i, "\n<<", "\n>>", false);
      if (N(parsed) > 0 && parsed[0] == '\n')
        parsed= parsed(1, N(parsed));
      tree verb= verbatim_to_tree (parsed, false, "SourceCode");
      if (is_atomic (verb))
        verb= document (verb);
      coqdoc << compound ("coqdoc-verbatim", verb);
      newline= true;
    }
    else if (test (s, i, "<<")) {
      string parsed= parse_delimited (s, i, "<<", ">>", false);
      tree verb= verbatim_to_tree (parsed, true, "SourceCode");
      line << compound ("coqdoc-verbatim", verb);
    }
    else if (s[i] == '_' && (i == 0 || !start_ident(s[i-1]))) {
      line << coqdoc_parse_emphasis (s, i);
      newline= false;
    }
    else if (test (s, i, "----")) {
      i+=  4;
      add_line (line, coqdoc);
      coqdoc << compound ("hrule");
      while (i<n && s[i] == '-') i++;
      newline= true;
    }
    else if (s[i] == '\n') {
      add_line (line, coqdoc);
      i++;
      if (is_whiteline (s, i)) {
        coqdoc << "";
        do
          skip_whiteline (s, i);
        while (is_whiteline (s, i));
        i--;
      }
      newline= true;
    }
    else if (s[i] == '<') {
      line << "<less>";
      i++;
      newline= false;
    }
    else if (s[i] == '>') {
      line << "<gtr>";
      i++;
      newline= false;
    }
    else {
      if (!is_spacing (s[i]))
        newline= false;
      int start= i;
      decode_from_utf8 (s, i);
      line << from_verbatim (s(start, i));
    }
  }
  if (N(line) > 0)
    add_line (line, coqdoc);
  if (N(coqdoc) == 0)
    return "";
  else if (N(coqdoc) == 1)
    return coqdoc[0];
  else
    return coqdoc;
}