int UArray_isLegalUTF8(const UArray *self) { uint8_t* sourceStart = self->data; uint8_t* sourceEnd = self->data + self->size * self->itemSize; return isLegalUTF8Sequence(sourceStart, sourceEnd); }
*/ REBFLG Legal_UTF8_Char(const REBYTE *str, REBCNT len) /* ** Returns TRUE if char is legal. ** ***********************************************************************/ { return isLegalUTF8Sequence(str, str + len); }
static unsigned findMaximalSubpartOfIllFormedUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) { UTF8 b1, b2, b3; assert(!isLegalUTF8Sequence(source, sourceEnd)); /* * Unicode 6.3.0, D93b: * * Maximal subpart of an ill-formed subsequence: The longest code unit * subsequence starting at an unconvertible offset that is either: * a. the initial subsequence of a well-formed code unit sequence, or * b. a subsequence of length one. */ if (source == sourceEnd) return 0; /* * Perform case analysis. See Unicode 6.3.0, Table 3-7. Well-Formed UTF-8 * Byte Sequences. */ b1 = *source; ++source; if (b1 >= 0xC2 && b1 <= 0xDF) { /* * First byte is valid, but we know that this code unit sequence is * invalid, so the maximal subpart has to end after the first byte. */ return 1; } if (source == sourceEnd) return 1; b2 = *source; ++source; if (b1 == 0xE0) { return (b2 >= 0xA0 && b2 <= 0xBF) ? 2 : 1; } if (b1 >= 0xE1 && b1 <= 0xEC) { return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1; } if (b1 == 0xED) { return (b2 >= 0x80 && b2 <= 0x9F) ? 2 : 1; } if (b1 >= 0xEE && b1 <= 0xEF) { return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1; } if (b1 == 0xF0) { if (b2 >= 0x90 && b2 <= 0xBF) { if (source == sourceEnd) return 2; b3 = *source; return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2; } return 1; } if (b1 >= 0xF1 && b1 <= 0xF3) { if (b2 >= 0x80 && b2 <= 0xBF) { if (source == sourceEnd) return 2; b3 = *source; return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2; } return 1; } if (b1 == 0xF4) { if (b2 >= 0x80 && b2 <= 0x8F) { if (source == sourceEnd) return 2; b3 = *source; return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2; } return 1; } assert((b1 >= 0x80 && b1 <= 0xC1) || b1 >= 0xF5); /* * There are no valid sequences that start with these bytes. Maximal subpart * is defined to have length 1 in these cases. */ return 1; }
// // Legal_UTF8_Char: C // // Returns TRUE if char is legal. // REBOOL Legal_UTF8_Char(const REBYTE *str, REBCNT len) { return LOGICAL(isLegalUTF8Sequence(str, str + len)); }
char *text2html(unsigned char *buffer, int buffer_len, int flags, char *fonttag) { unsigned char *saved_buffer = buffer; string str; if (string_initialize(&str,1024)) { char buf[512]; int last_color = 0; /* the color of the current line */ int eval_color = 2; /* recheck the color */ int initial_color = 1; int level = 0; int line = 0; /* the type of the line */ if (flags & TEXT2HTML_BODY_TAG) { sm_snprintf(buf,sizeof(buf),"<BODY BGCOLOR=\"#%06x\" TEXT=\"#%06x\" LINK=\"#%06x\">",user.config.read_background,user.config.read_text,user.config.read_link); string_append(&str,buf); } string_append(&str,fonttag); /* accepts NULL pointer */ /* check for >0, otherwise we'll be in an endless loop if buffer_len becomes <0 */ while (buffer_len > 0) { if (eval_color) { int new_level = 0; int buffer2_len = buffer_len; unsigned char *buffer2 = buffer; int new_color = 0; /* Determine the citation level. Afterwards, buffer2 will point to the end of the citation symbols. */ while (buffer2_len) { unsigned char c = *buffer2; if (c == '>') { new_level++; if (new_color == 1) new_color = 2; else new_color = 1; } else { if (c != ' ') break; if (c == ' ' && !new_level) break; } buffer2_len--; buffer2++; } if (user.config.read_graphical_quote_bar) { /* When graphical quote bar is enabled we skip all quotation symbols */ buffer = buffer2; buffer_len = buffer2_len; if (level != new_level) { char *begin_quote_string = "<TABLE BGCOLOR=\"#%06x\" WIDTH=\"100%%\" STYLE=\"border-left: 3px solid #%06x; border-right: 3px solid #%06x;\"><TD><FONT COLOR=\"#%06x\">"; char *end_quote_string = "</FONT></TD></TABLE>"; /* If new level is larger */ for (;level < new_level; level++) { unsigned int color = level%2?user.config.read_quoted:user.config.read_old_quoted; sm_snprintf(buf,sizeof(buf),begin_quote_string,user.config.read_quoted_background,color,color,color); string_append(&str,buf); } /* If new level is lower */ for (;level > new_level; level--) string_append(&str,end_quote_string); } } else { if (last_color != new_color) { char *begin_quote_string = "<FONT COLOR=\"#%x\">"; char *end_quote_string = "</FONT>"; if (!initial_color) string_append(&str,end_quote_string); if (new_color == 1) { sm_snprintf(buf,sizeof(buf),begin_quote_string,user.config.read_quoted); string_append(&str,buf); } else if (new_color == 2) { sm_snprintf(buf,sizeof(buf),begin_quote_string,user.config.read_old_quoted); string_append(&str,buf); } last_color = new_color; if (new_color) initial_color = 0; else initial_color = 1; } } eval_color = 0; } if (!mystrnicmp("http:",(char*)buffer,5)) write_uri(&buffer, &buffer_len, &str); else if (!mystrnicmp("mailto:",(char*)buffer,7)) write_uri(&buffer, &buffer_len, &str); else if (!mystrnicmp("ftp:",(char*)buffer,4)) write_uri(&buffer, &buffer_len, &str); else if (!mystrnicmp("https:",(char*)buffer,6)) write_uri(&buffer, &buffer_len, &str); else { unsigned char c; c = *buffer; if ((c == '@') && (buffer > saved_buffer)) { /* A @ has been encountered, check if this belongs to an email adresse by traversing back * within the string */ unsigned char *buffer2 = buffer - 1; unsigned char *buffer3; unsigned char c2; int buffer2_len = buffer_len + 1; char *address; while ((c2 = *buffer2) && buffer2 > saved_buffer) { static const char noaliaschars[] = { " ()<>@,;:\\\"[]\n\r"}; if (strchr(noaliaschars,c2)) { buffer2++; break; } buffer2_len++; buffer2--; } if ((buffer3 = (unsigned char*)parse_addr_spec((char*)buffer2, &address))) { int email_len; /* crop the string to the beginning of the email address */ string_crop(&str,0,str.len - (buffer - buffer2)); buffer_len += buffer - buffer2; buffer -= buffer - buffer2; email_len = buffer3 - buffer; buffer_len -= email_len; buffer = buffer3; sm_snprintf(buf,sizeof(buf),"<A HREF=\"mailto:%s\"%s>",address, user.config.read_link_underlined?"":" STYLE=\"TEXT-DECORATION: none\""); string_append(&str,buf); write_unicode(address,&str); string_append(&str,"</A>"); free(address); continue; } } if (user.config.read_smilies) { int i; int smily_used = 0; /* No look into the smily table, this is slow and needs to be improved */ for (i=0;i<sizeof(smily)/sizeof(struct smily);i++) { if (!strncmp(smily[i].ascii,(char*)buffer,strlen(smily[i].ascii))) { buffer += strlen(smily[i].ascii); buffer_len -= strlen(smily[i].ascii); sm_snprintf(buf,sizeof(buf),"<IMG SRC=\"PROGDIR:Images/%s\" VALIGN=\"middle\" ALT=\"%s\">",smily[i].gfx,smily[i].ascii); string_append(&str,buf); smily_used = 1; } } if (smily_used) continue; } if (!strncmp("\n<sb>",(char*)buffer,5)) { if (line) string_append(&str,"<BR></TD><TD WIDTH=\"50%\"><HR></TD></TR></TABLE>"); line = 1; buffer += 5; buffer_len -= 5; string_append(&str,"<TABLE WIDTH=\"100%\" BORDER=\"0\"><TR><TD VALIGN=\"middle\" WIDTH=\"50%\"><HR></TD><TD>"); continue; } if (!strncmp("\n<tsb>",(char*)buffer,6)) { if (line) string_append(&str,"<BR></TD><TD WIDTH=\"50%\"><HR></TD></TR></TABLE>"); line = 2; buffer += 6; buffer_len -= 6; string_append(&str,"<TABLE WIDTH=\"100%\" BORDER=\"0\"><TR><TD VALIGN=\"middle\" WIDTH=\"50%\"><HR></TD><TD>"); continue; } if (c < 128) { buffer++; buffer_len--; if (c== '<') string_append(&str,"<"); else if (c== '>') string_append(&str,">"); else if (c== '&') string_append(&str,"&"); else if (c == 10) { eval_color = 1; string_append(&str,"<BR>\n"); if (line) { string_append(&str,"</TD><TD WIDTH=\"50%\"><HR></TD></TR></TABLE>"); line = 0; } } else { if (c == 32) { if (*buffer == 32 || (flags & TEXT2HTML_NOWRAP)) string_append(&str," "); else string_append(&str," "); } else { if (c) { string_append_part(&str,(char*)&c,1); } } } } else { unsigned int unicode; int len = 0; /* check if it really could be a utf8 char */ if (isLegalUTF8Sequence((utf8*)buffer, (utf8*)(buffer+buffer_len))) { len = utf8tochar((utf8*)buffer, &unicode, user.config.default_codeset); } if ((len == 0) || (len > buffer_len)) { /* something wrong with that utf8 sequence */ unicode = '?'; len = 1; } buffer_len -= len; buffer += len; if (unicode == 0) unicode = '_'; sm_snprintf(buf,sizeof(buf),"&#%d;",unicode); string_append(&str,buf); } } } if (fonttag) string_append(&str,"</FONT>"); if (flags & TEXT2HTML_ENDBODY_TAG) string_append(&str,"</BODY>"); SM_DEBUGF(20,("%s\n",str.str)); return str.str; } return NULL; }