__C_LINK char * __DPSCALL DpsSGMLUnescape(char * str){ char *s = str,*e = str, c, z; /*****************/ while(*s){ if(*s=='&'){ if(*(s+1)=='#'){ for(e=s+2;(e-s<DPS_MAX_SGML_LEN)&&(*e<='9')&&(*e>='0');e++); if(*e==';'){ int v=atoi(s+2); if(v>=0&&v<=255) { *s=(char)v; } else { *s = ' '; } dps_memmove(s+1, e+1, dps_strlen(e + 1) + 1); } }else{ for(e=s+1;(e-s<DPS_MAX_SGML_LEN)&&(((*e<='z')&&(*e>='a'))||((*e<='Z')&&(*e>='A')));e++); z = *e; *e = '\0'; if( (z == ';') && (c=(char)DpsSgmlToUni(s+1))){ *s=c; dps_memmove(s+1, e+1, dps_strlen(e + 1) + 1); } if (z != ';') *e = z; else s++; } } s++; } return(str); }
/** This function replaces SGML entities With their UNICODE equivalents */ void DpsSGMLUniUnescape(dpsunicode_t *ustr) { dpsunicode_t *s = ustr, *e, c; char sgml[DPS_MAX_SGML_LEN+1]; while (*s){ if(*s=='&'){ int i = 0; if(*(s+1)=='#'){ for(e = s + 2; (e - s < DPS_MAX_SGML_LEN) && (*e <= '9') && (*e >= '0'); e++); if(*e==';'){ for(i = 2; s + i < e; i++) sgml[i-2]=s[i]; sgml[i-2] = '\0'; *s = atoi(sgml); dps_memmove(s + 1, e + 1, sizeof(dpsunicode_t) * (DpsUniLen(e + 1) + 1)); } }else{ for(e=s+1;(e-s<DPS_MAX_SGML_LEN)&&(((*e<='z')&&(*e>='a'))||((*e<='Z')&&(*e>='A')));e++) { sgml[i] = (char)*e; i++; } sgml[i] = '\0'; if( (*e==';') && (c = DpsSgmlToUni(sgml)) ) { *s=c; dps_memmove(s + 1, e + 1, sizeof(dpsunicode_t) * (DpsUniLen(e + 1) + 1)); } } } s++; } }
int dps_mb_wc_tscii(DPS_CONV *conv, DPS_CHARSET *cs, dpsunicode_t *pwc, const unsigned char *s, const unsigned char *end) { int hi; const unsigned char *p; unsigned char *e, z; unsigned int sw; int n; hi = s[0]; conv->icodes = conv->ocodes = 1; if(hi < 0x80) { if ( (*s == '&' && ((conv->flags & DPS_RECODE_HTML_FROM) || (conv->flags & DPS_RECODE_URL_FROM)) ) || (*s == '!' && (conv->flags & DPS_RECODE_URL_FROM)) ) { /*if ((p = strchr(s, ';')) != NULL)*/ { if (s[1] == '#') { p = s + 2; if (s[2] == 'x' || s[2] == 'X') sscanf(s + 3, "%x", &sw); else sscanf(s + 2, "%d", &sw); *pwc = (dpsunicode_t)sw; } else { p = s + 1; if (!(conv->flags & DPS_RECODE_TEXT_FROM)) { for(e = s + 1 ; (e - s < DPS_MAX_SGML_LEN) && (((*e<='z')&&(*e>='a'))||((*e<='Z')&&(*e>='A'))); e++); if (/*!(conv->flags & DPS_RECODE_URL_FROM) ||*/ (*e == ';')) { z = *e; *e = '\0'; n = DpsSgmlToUni(s + 1, pwc); if (n == 0) *pwc = 0; else conv->ocodes = n; *e = z; } else *pwc = 0; } else *pwc = 0; } if (*pwc) { for (; isalpha(*p) || isdigit(*p); p++); if (*p == ';') p++; return conv->icodes = (p - s /*+ 1*/); } } } pwc[0] = hi; return 1; } switch(len_tscii[hi]) { case 3: pwc[2] = 0x0B82; conv->ocodes++; case 2: pwc[1] = tab2_tscii[hi]; conv->ocodes++; case 1: pwc[0] = tab_tscii[hi]; } return 1; }
__C_LINK int __DPSCALL dps_mb_wc_8bit(DPS_CONV *conv, DPS_CHARSET *cs, dpsunicode_t *wc, const unsigned char *str, const unsigned char *end) { const unsigned char *p; unsigned char *e, z; unsigned int sw; int n; conv->ocodes = 1; if ( (*str == '&' && ((conv->flags & DPS_RECODE_HTML_FROM)||(conv->flags & DPS_RECODE_URL_FROM)) ) || (*str == '!' && (conv->flags & DPS_RECODE_URL_FROM)) ) { /* p = strchr(str, ';');*/ /*if (p != NULL)*/ { if (str[1] == '#') { p = str + 2; if (str[2] == 'x' || str[2] == 'X') sscanf((const char*)(str + 3), "%x", &sw); else sscanf((const char*)(str + 2), "%d", &sw); *wc = (dpsunicode_t)sw; if (sw < 256 && sw > 0x20 && DpsUniCType(*wc) >= DPS_UNI_OTHER_C) { // try to resolve bogus ENTITY escaping dpsunicode_t sv = cs->tab_to_uni[sw]; if (DpsUniCType(sv) < DPS_UNI_OTHER_C) *wc = sv; } } else { p = str + 1; if (!(conv->flags & DPS_RECODE_TEXT_FROM)) { for(e = (unsigned char*)str + 1 ; (e - str < DPS_MAX_SGML_LEN) && (((*e<='z')&&(*e>='a'))||((*e<='Z')&&(*e>='A'))); e++); if (/*!(conv->flags & DPS_RECODE_URL_FROM) ||*/ (*e == ';')) { z = *e; *e = '\0'; n = DpsSgmlToUni((const char*)str + 1, wc); if (n == 0) *wc = 0; else conv->ocodes = (size_t)n; *e = z; } else *wc = 0; } else *wc = 0; } if (*wc) { for (; isalpha(*p) || isdigit(*p); p++); if (*p == ';') p++; return conv->icodes = (size_t)(p - str /*+ 1*/); } } } if ( *str == '\\' && (conv->flags & DPS_RECODE_JSON_FROM)) { n = DpsJSONToUni((const char*)str + 1, wc, &conv->icodes); if (n) { conv->ocodes = n; return ++conv->icodes; } } conv->icodes = 1; *wc = cs->tab_to_uni[*str]; return (!wc[0] && str[0]) ? DPS_CHARSET_ILSEQ : 1; }
int dps_mb_wc_gujarati(DPS_CONV *conv, DPS_CHARSET *cs, dpsunicode_t *pwc, const unsigned char *s, const unsigned char *end) { int hi; const unsigned char *p; unsigned char *e, z; unsigned int sw; int n; hi = s[0]; conv->icodes = conv->ocodes = 1; if(hi < 0x80) { if ( (*s == '&' && ((conv->flags & DPS_RECODE_HTML_FROM) || (conv->flags & DPS_RECODE_URL_FROM)) ) || (*s == '!' && (conv->flags & DPS_RECODE_URL_FROM)) ) { /*if ((p = strchr(s, ';')) != NULL)*/ { if (s[1] == '#') { p = s + 2; if (s[2] == 'x' || s[2] == 'X') sscanf(s + 3, "%x", &sw); else sscanf(s + 2, "%d", &sw); *pwc = (dpsunicode_t)sw; } else { p = s + 1; if (!(conv->flags & DPS_RECODE_TEXT_FROM)) { for(e = s + 1 ; (e - s < DPS_MAX_SGML_LEN) && (((*e<='z')&&(*e>='a'))||((*e<='Z')&&(*e>='A'))); e++); if (/*!(conv->flags & DPS_RECODE_URL_FROM) ||*/ (*e == ';')) { z = *e; *e = '\0'; n = DpsSgmlToUni(s + 1, pwc); if (n == 0) *pwc = 0; else conv->ocodes = n; *e = z; } else *pwc = 0; } else *pwc = 0; } if (*pwc) { for (; isalpha(*p) || isdigit(*p); p++); if (*p == ';') p++; return conv->icodes = (p - s /*+ 1*/); } } } pwc[0] = hi; return 1; } if (hi == 0xA1) { if ((s + 2 > end) || (s[1] != 0xE9)) { pwc[0] = tab_gujarati[0xA1]; return 1; } pwc[0] = 0x0AD0; return conv->icodes = 2; } if (hi == 0xAA) { if ((s + 2 > end) || (s[1] != 0xE9)) { pwc[0] = tab_gujarati[0xAA]; return 1; } pwc[0] = 0x0AE0; return conv->icodes = 2; } if (hi == 0xDF) { if ((s + 2 > end) || (s[1] != 0xE9)) { pwc[0] = tab_gujarati[0xDF]; return 1; } pwc[0] = 0x0AC4; return conv->icodes = 2; } if (hi == 0xE8) { if ((s + 2 > end) || ((s[1] != 0xE8) &&(s[1] != 0xE9)) ) { pwc[0] = tab_gujarati[0xE8]; return 1; } pwc[0] = 0x0ACD; pwc[1] = (s[1] == 0xE8) ? 0x200C : 0x200D; return conv->icodes = conv->ocodes = 2; } pwc[0] = tab_gujarati[hi]; return 1; }