static string parse_identifier (string s, int &i) { int n= N(s), start= i; if (i<n && start_ident (s[i])) { decode_from_utf8 (s, i); while (i<n && continue_ident (s[i])) decode_from_utf8 (s, i); } return as_string (from_verbatim (s (start, i))); }
int get_utf8_code (string c) { string uc= cork_to_utf8 (c); int pos= 0; int code= decode_from_utf8 (uc, pos); if (pos == N(uc)) return code; else return -1; }
int str_length (string s, bool utf8) { if (utf8) { int i=0, r=0; while (i < N(s)) { decode_from_utf8 (s, i); r++; } return r; } else return N(s); }
void goto_next_char (string s, int &i, bool utf8) { if (utf8) decode_from_utf8 (s, i); else if (i < N(s)) { if (s[i] == '<') { i++; while (i < N(s) && s[i] != '>') i++; if (i < N(s)) i++; } else i++; } }
string get_unicode_range (string c) { string uc= cork_to_utf8 (c); int pos= 0; int code= decode_from_utf8 (uc, pos); string range= ""; if (code <= 0x7f) range= "ascii"; else if (code >= 0x80 && code <= 0x37f) range= "latin"; else if (code >= 0x380 && code <= 0x3ff) range= "greek"; else if (code >= 0x400 && code <= 0x4ff) range= "cyrillic"; else if (code >= 0x3000 && code <= 0x303f) range= "cjk"; else if (code >= 0x4e00 && code <= 0x9fcc) range= "cjk"; else if (code >= 0xff00 && code <= 0xffef) range= "cjk"; else if (code >= 0xac00 && code <= 0xd7af) range= "hangul"; else if (code >= 0x2000 && code <= 0x23ff) range= "mathsymbols"; else if (code >= 0x2900 && code <= 0x2e7f) range= "mathextra"; else if (code >= 0x1d400 && code <= 0x1d7ff) range= "mathletters"; if (pos == N(uc)) return range; return ""; }
static tree coqdoc_to_tree (string s) { bool newline= true; int i=0, n= N(s); tree coqdoc (DOCUMENT), line (CONCAT); if (starts (s, "(**")) { line << "(**"; i+= 3; } while (i < n) { if (test (s, i, "[[\n")) { add_line (line, coqdoc); tree vernac= vernac_to_tree (parse_delimited (s, i, "[[\n", "\n]]", false)); coqdoc << compound ("coqdoc-vernac", vernac); newline= true; } else if (s[i] == '[') line << compound ("coqdoc-coq", from_verbatim (parse_delimited (s, i, "[", "]", false))); else if (newline && (test (s, i, "**** ") || test (s, i, "*** ") || test (s, i, "** ") || test (s, i, "* "))) { string header= "section"; if (test (s, i, "** ")) header= "subsection"; if (test (s, i, "*** ")) header= "subsubsection"; if (test (s, i, "**** ")) header= "paragraph"; while (i<n && s[i] == '*') i++; while (i<n && is_spacing (s[i])) i++; int start= i; while (i<n && (s[i] != '\n' && !test (s, i, "*)"))) i++; line << compound (header, coqdoc_to_tree (s (start, i))); } else if (newline && is_defining_pretty_printing (s, i)) { string str= parse_delimited (s, i, "(*", "*)", false); parse_pretty_printing_definition (str); } else if (newline && is_removing_pretty_printing (s, i)) { string str= parse_delimited (s, i, "(*", "*)", false); parse_pretty_printing_removal (str); } else if (test (s, i, "%%")) { line << "%"; newline= false; i+= 2; } else if (test (s, i, "$$")) { line << "$"; newline= false; i+= 2; } else if (test (s, i, "##")) { line << "#"; newline= false; i+= 2; } else if (s[i] == '#' || s[i] == '%' || s[i] == '$') { newline= false; char delim= s[i]; string ext= unescape_coqdoc (parse_delimited (s, i, delim)); tree tm; if (delim == '#') tm= compound ("coqdoc-html", generic_to_tree (ext, "html-snippet")); else if (delim == '$') tm= compound ("coqdoc-latex", generic_to_tree ("$"*ext*"$", "latex-snippet")); else if (delim == '%') tm= compound ("coqdoc-latex", generic_to_tree (ext, "latex-snippet")); if (is_multi_paragraph (tm)) { add_line (line, coqdoc); coqdoc << tm; } else line << tm; } else if (is_list_begining (s, i)) { tree list= parse_list (s, i); add_line (line, coqdoc); coqdoc << list; newline= true; } else if (test (s, i, "\n<<")) { add_line (line, coqdoc); string parsed= parse_delimited (s, i, "\n<<", "\n>>", false); if (N(parsed) > 0 && parsed[0] == '\n') parsed= parsed(1, N(parsed)); tree verb= verbatim_to_tree (parsed, false, "SourceCode"); if (is_atomic (verb)) verb= document (verb); coqdoc << compound ("coqdoc-verbatim", verb); newline= true; } else if (test (s, i, "<<")) { string parsed= parse_delimited (s, i, "<<", ">>", false); tree verb= verbatim_to_tree (parsed, true, "SourceCode"); line << compound ("coqdoc-verbatim", verb); } else if (s[i] == '_' && (i == 0 || !start_ident(s[i-1]))) { line << coqdoc_parse_emphasis (s, i); newline= false; } else if (test (s, i, "----")) { i+= 4; add_line (line, coqdoc); coqdoc << compound ("hrule"); while (i<n && s[i] == '-') i++; newline= true; } else if (s[i] == '\n') { add_line (line, coqdoc); i++; if (is_whiteline (s, i)) { coqdoc << ""; do skip_whiteline (s, i); while (is_whiteline (s, i)); i--; } newline= true; } else if (s[i] == '<') { line << "<less>"; i++; newline= false; } else if (s[i] == '>') { line << "<gtr>"; i++; newline= false; } else { if (!is_spacing (s[i])) newline= false; int start= i; decode_from_utf8 (s, i); line << from_verbatim (s(start, i)); } } if (N(line) > 0) add_line (line, coqdoc); if (N(coqdoc) == 0) return ""; else if (N(coqdoc) == 1) return coqdoc[0]; else return coqdoc; }