Exemple #1
0
__C_LINK char * __DPSCALL DpsSGMLUnescape(char * str){
  char *s = str,*e = str, c, z;

/*****************/     
     while(*s){
          if(*s=='&'){
               if(*(s+1)=='#'){
                    for(e=s+2;(e-s<DPS_MAX_SGML_LEN)&&(*e<='9')&&(*e>='0');e++);
                    if(*e==';'){
                         int v=atoi(s+2);
                         if(v>=0&&v<=255) {
                           *s=(char)v;
                         } else {
                           *s = ' ';
                         }
                         dps_memmove(s+1, e+1, dps_strlen(e + 1) + 1);
                    }
               }else{
                    for(e=s+1;(e-s<DPS_MAX_SGML_LEN)&&(((*e<='z')&&(*e>='a'))||((*e<='Z')&&(*e>='A')));e++);
		    z = *e;
		    *e = '\0';
                    if( (z == ';') && (c=(char)DpsSgmlToUni(s+1))){
                         *s=c;
                         dps_memmove(s+1, e+1, dps_strlen(e + 1) + 1);
                    }
		    if (z != ';') *e = z;
		    else s++;
               }
          }
          s++;
     }
     return(str);
}
Exemple #2
0
/** This function replaces SGML entities
    With their UNICODE   equivalents     
*/
void DpsSGMLUniUnescape(dpsunicode_t *ustr) {
  dpsunicode_t *s = ustr, *e, c;
  char sgml[DPS_MAX_SGML_LEN+1];

  while (*s){
          if(*s=='&'){
               int i = 0;
               if(*(s+1)=='#'){
                    for(e = s + 2; (e - s < DPS_MAX_SGML_LEN) && (*e <= '9') && (*e >= '0'); e++);
                    if(*e==';'){
                         for(i = 2; s + i < e; i++)
                              sgml[i-2]=s[i];
                         sgml[i-2] = '\0';
                         *s = atoi(sgml);
                         dps_memmove(s + 1, e + 1, sizeof(dpsunicode_t) * (DpsUniLen(e + 1) + 1));
                    }
               }else{
		    for(e=s+1;(e-s<DPS_MAX_SGML_LEN)&&(((*e<='z')&&(*e>='a'))||((*e<='Z')&&(*e>='A')));e++) {
                      sgml[i] = (char)*e;
                      i++;
                    }
		    sgml[i] = '\0';
                    if( (*e==';') && (c = DpsSgmlToUni(sgml)) ) {
                         *s=c;
                         dps_memmove(s + 1, e + 1, sizeof(dpsunicode_t) * (DpsUniLen(e + 1) + 1));
                         
                    }
		    
               }
          }
          s++;
  }
}
int dps_mb_wc_tscii(DPS_CONV *conv, DPS_CHARSET *cs, dpsunicode_t *pwc, const unsigned char *s, const unsigned char *end) {

  int hi;
  const unsigned char *p;
  unsigned char *e, z;
  unsigned int sw;
  int n;
  
  hi = s[0];
  conv->icodes = conv->ocodes = 1;
  
  if(hi < 0x80) {
    if ( (*s == '&' && ((conv->flags & DPS_RECODE_HTML_FROM) || (conv->flags & DPS_RECODE_URL_FROM)) ) ||
	 (*s == '!' && (conv->flags & DPS_RECODE_URL_FROM)) ) {
      /*if ((p = strchr(s, ';')) != NULL)*/ {
	if (s[1] == '#') {
	  p = s + 2;
	  if (s[2] == 'x' || s[2] == 'X') sscanf(s + 3, "%x", &sw);
	  else sscanf(s + 2, "%d", &sw);
	  *pwc = (dpsunicode_t)sw;
	} else {
	  p = s + 1;
	  if (!(conv->flags & DPS_RECODE_TEXT_FROM)) {
	    for(e = s + 1 ; (e - s < DPS_MAX_SGML_LEN) && (((*e<='z')&&(*e>='a'))||((*e<='Z')&&(*e>='A'))); e++);
	    if (/*!(conv->flags & DPS_RECODE_URL_FROM) ||*/ (*e == ';')) {
	      z = *e;
	      *e = '\0';
	      n = DpsSgmlToUni(s + 1, pwc);
	      if (n == 0) *pwc = 0;
	      else conv->ocodes = n;
	      *e = z;
	    } else *pwc = 0;
	  } else *pwc = 0;
	}
	if (*pwc) {
	  for (; isalpha(*p) || isdigit(*p); p++);
	  if (*p == ';') p++;
	  return conv->icodes = (p - s /*+ 1*/);
	}
      }
    }
    pwc[0] = hi;
    return 1;
  }
  switch(len_tscii[hi]) {
  case 3:
    pwc[2] = 0x0B82;
    conv->ocodes++;
  case 2:
    pwc[1] = tab2_tscii[hi];
    conv->ocodes++;
  case 1:
    pwc[0] = tab_tscii[hi];
  }
  return 1;
}
__C_LINK int __DPSCALL dps_mb_wc_8bit(DPS_CONV *conv, DPS_CHARSET *cs, dpsunicode_t *wc,
				      const unsigned char *str, const unsigned char *end) {

  const unsigned char *p;
  unsigned char *e, z;
  unsigned int sw;
  int n;

  conv->ocodes = 1;

  if ( (*str == '&' && ((conv->flags & DPS_RECODE_HTML_FROM)||(conv->flags & DPS_RECODE_URL_FROM)) ) || 
       (*str == '!' && (conv->flags & DPS_RECODE_URL_FROM)) ) {
/*    p = strchr(str, ';');*/
    /*if (p != NULL)*/ {
      if (str[1] == '#') {
	p = str + 2;
	if (str[2] == 'x' || str[2] == 'X') sscanf((const char*)(str + 3), "%x", &sw);
	else sscanf((const char*)(str + 2), "%d", &sw);
	*wc = (dpsunicode_t)sw;
	if (sw < 256 && sw > 0x20 && DpsUniCType(*wc) >= DPS_UNI_OTHER_C) { // try to resolve bogus ENTITY escaping
	  dpsunicode_t sv = cs->tab_to_uni[sw];
	  if (DpsUniCType(sv) < DPS_UNI_OTHER_C) *wc = sv;
	}
      } else {
	p = str + 1;
	if (!(conv->flags & DPS_RECODE_TEXT_FROM)) {
	  for(e = (unsigned char*)str + 1 ; (e - str < DPS_MAX_SGML_LEN) && (((*e<='z')&&(*e>='a'))||((*e<='Z')&&(*e>='A'))); e++);
	  if (/*!(conv->flags & DPS_RECODE_URL_FROM) ||*/ (*e == ';')) {
	    z = *e;
	    *e = '\0';
	    n = DpsSgmlToUni((const char*)str + 1, wc);
	    if (n == 0) *wc = 0;
	    else conv->ocodes = (size_t)n;
	    *e = z;
	  } else *wc = 0;
	} else *wc = 0;
      }
      if (*wc) {
	for (; isalpha(*p) || isdigit(*p); p++);
	if (*p == ';') p++;
	return conv->icodes = (size_t)(p - str /*+ 1*/);
      }
    }
  }
  if ( *str == '\\' && (conv->flags & DPS_RECODE_JSON_FROM)) {
    n = DpsJSONToUni((const char*)str + 1, wc, &conv->icodes);
    if (n) {
      conv->ocodes = n;
      return ++conv->icodes;
    }
  }

  conv->icodes = 1;
  *wc = cs->tab_to_uni[*str];
  return (!wc[0] && str[0]) ? DPS_CHARSET_ILSEQ : 1;
}
int dps_mb_wc_gujarati(DPS_CONV *conv, DPS_CHARSET *cs, dpsunicode_t *pwc, const unsigned char *s, const unsigned char *end) {

  int hi;
  const unsigned char *p;
  unsigned char *e, z;
  unsigned int sw;
  int n;
  
  hi = s[0];
  conv->icodes = conv->ocodes = 1;
  
  if(hi < 0x80) {
    if ( (*s == '&' && ((conv->flags & DPS_RECODE_HTML_FROM) || (conv->flags & DPS_RECODE_URL_FROM)) ) ||
	     (*s == '!' && (conv->flags & DPS_RECODE_URL_FROM)) ) {
	  /*if ((p = strchr(s, ';')) != NULL)*/ {
	    if (s[1] == '#') {
	      p = s + 2;
	      if (s[2] == 'x' || s[2] == 'X') sscanf(s + 3, "%x", &sw);
	      else sscanf(s + 2, "%d", &sw);
	      *pwc = (dpsunicode_t)sw;
	    } else {
	      p = s + 1;
	      if (!(conv->flags & DPS_RECODE_TEXT_FROM)) {
		for(e = s + 1 ; (e - s < DPS_MAX_SGML_LEN) && (((*e<='z')&&(*e>='a'))||((*e<='Z')&&(*e>='A'))); e++);
		if (/*!(conv->flags & DPS_RECODE_URL_FROM) ||*/ (*e == ';')) {
		  z = *e;
		  *e = '\0';
		  n = DpsSgmlToUni(s + 1, pwc);
		  if (n == 0) *pwc = 0;
		  else conv->ocodes = n;
		  *e = z;
		} else *pwc = 0;
	      } else *pwc = 0;
	    }
	    if (*pwc) {
	      	for (; isalpha(*p) || isdigit(*p); p++);
		if (*p == ';') p++;
		return conv->icodes = (p - s /*+ 1*/);
	    }
	  }
	}
    pwc[0] = hi;
    return 1;
  }

  if (hi == 0xA1) {
    if ((s + 2 > end) || (s[1] != 0xE9)) {
      pwc[0] = tab_gujarati[0xA1];
      return 1;
    }
    pwc[0] = 0x0AD0;
    return conv->icodes = 2;
  }

  if (hi == 0xAA) {
    if ((s + 2 > end) || (s[1] != 0xE9)) {
      pwc[0] = tab_gujarati[0xAA];
      return 1;
    }
    pwc[0] = 0x0AE0;
    return conv->icodes = 2;
  }

  if (hi == 0xDF) {
    if ((s + 2 > end) || (s[1] != 0xE9)) {
      pwc[0] = tab_gujarati[0xDF];
      return 1;
    }
    pwc[0] = 0x0AC4;
    return conv->icodes = 2;
  }

  if (hi == 0xE8) {
    if ((s + 2 > end) || ((s[1] != 0xE8) &&(s[1] != 0xE9))   ) {
      pwc[0] = tab_gujarati[0xE8];
      return 1;
    }
    pwc[0] = 0x0ACD;
    pwc[1] = (s[1] == 0xE8) ? 0x200C : 0x200D;
    return conv->icodes = conv->ocodes = 2;
  }

  pwc[0] = tab_gujarati[hi];
  return 1;
}