Exemple #1
0
// Count number of utf8 characters needed to convert multi-byte characters from src
FXint FXTextCodec::mb2utflen(const FXchar* src,FXint nsrc) const {
  register FXint nr,len=0;
  FXwchar w;
  if(src && 0<nsrc){
    do{
      nr=mb2wc(w,src,nsrc);
      if(nr<=0) return nr;
      src+=nr;
      nsrc-=nr;
      len+=utflen(w);
      }
    while(0<nsrc);
    }
  return len;
  }
Exemple #2
0
// Convert multi-byte characters from src to utf8 characters at dst
FXint FXTextCodec::mb2utf(FXchar* dst,FXint ndst,const FXchar* src,FXint nsrc) const {
  register FXint nr,nw,len=0;
  FXwchar w;
  if(dst && src && 0<nsrc){
    do{
      nr=mb2wc(w,src,nsrc);
      if(nr<=0) return nr;
      src+=nr;
      nsrc-=nr;
      nw=wc2utf(dst,ndst,w);
      if(nw<=0) return nw;
      len+=nw;
      dst+=nw;
      ndst-=nw;
      }
    while(0<nsrc);
    }
  return len;
  }
Exemple #3
0
int
main(int argc, char **argv)
{ dtd_parser *p = NULL;
  char *s;
  int xml = FALSE;
  int output = TRUE;
  int nodefs = FALSE;		/* include defaulted attributes */

  setlocale(LC_CTYPE, "");

  s = strchr(argv[0], '/');
  program = s == NULL ? argv[0] : s + 1;
  if (streq(program, "xml"))
    xml = TRUE;

  shift;

  while (argc > 0 && argv[0][0] == '-')
  { if (streq(argv[0], "-xml"))
    { xml = TRUE;
    } else if (streq(argv[0], "-s"))
    { output = FALSE;
    } else if (streq(argv[0], "-nodefs"))
    { nodefs = TRUE;
    } else if (streq(argv[0], "-style"))
    { style_messages = TRUE;
    } else
    { usage();
    }
    shift;
  }

  if (argc > 0)
  { char *slash = strchr(argv[0], '/');
    char *dot = strchr(argv[0], '.');
    char *ext = dot == 0 || (slash != 0 && slash > dot) ? "." : dot;

    if (strcaseeq(ext, ".dtd"))
    { char doctype[256];

      strncpy(doctype, argv[0], ext - argv[0]);
      doctype[ext - argv[0]] = '\0';

      p = new_dtd_parser(new_dtd(mb2wc(doctype)));
      load_dtd_from_file(p, mb2wc(argv[0]));
      shift;
    } else if (strcaseeq(ext, ".html") || strcaseeq(ext, ".htm"))
    { p = new_dtd_parser(new_dtd((ichar const *) "html"));
      load_dtd_from_file(p, L"html.dtd");
    } else if (xml || strcaseeq(ext, ".xml"))
    { dtd *dtd = new_dtd(no_dtd);

      set_dialect_dtd(dtd, DL_XML);
      p = new_dtd_parser(dtd);
    } else
    { p = new_dtd_parser(new_dtd(no_dtd));
    }
  } else
  { p = new_dtd_parser(new_dtd(no_dtd));
  }

  if (nodefs)
    p->flags |= SGML_PARSER_NODEFS;

  switch (argc)
  { case 1:
    { set_functions(p, output);
      sgml_process_file(p, mb2wc(argv[0]), 0);
      free_dtd_parser(p);
      if (output && nerrors == 0)
	printf("C\n");
      return 0;
    }
    case 0:
    { set_functions(p, output);
      set_file_dtd_parser(p, IN_FILE, L"stdin");
      set_mode_dtd_parser(p, DM_DATA);
      sgml_process_stream(p, stdin, 0);
      free_dtd_parser(p);
      if (output && nerrors == 0 && nwarnings == 0)
	printf("C\n");
      return 0;
    }
    default:
    { usage();
      return EXIT_FAILURE;
    }
  }
}
Exemple #4
0
static void
cutfields(Cut_t* cut, Sfio_t* fdin, Sfio_t* fdout)
{
	register unsigned char *sp = cut->space;
	register unsigned char *cp;
	register unsigned char *wp;
	register int c, nfields;
	register const int *lp = cut->list;
	register unsigned char *copy;
	register int nodelim, empty, inword=0;
	register unsigned char *ep;
	unsigned char *bp, *first;
	int lastchar;
	wchar_t w;
	Sfio_t *fdtmp = 0;
	long offset = 0;
	unsigned char mb[8];
	/* process each buffer */
	while ((bp = (unsigned char*)sfreserve(fdin, SF_UNBOUND, -1)) && (c = sfvalue(fdin)) > 0)
	{
		cp = bp;
		ep = cp + --c;
		if((lastchar = cp[c]) != cut->eob)
			*ep = cut->eob;
		/* process each line in the buffer */
		while (cp <= ep)
		{
			first = cp;
			if (!inword)
			{
				nodelim = empty = 1;
				copy = cp;
				if (nfields = *(lp = cut->list))
					copy = 0;
				else
					nfields = *++lp;
			}
			else if (copy)
				copy = cp;
			inword = 0;
			do
			{
				/* skip over non-delimiter characters */
				if (cut->mb)
					for (;;)
					{
						switch (c = sp[*(unsigned char*)cp++])
						{
						case 0:
							continue;
						case SP_WIDE:
							wp = --cp;
							while ((c = mb2wc(w, cp, ep - cp)) <= 0)
							{
								/* mb char possibly spanning buffer boundary -- fun stuff */
								if ((ep - cp) < mbmax())
								{
									int	i;
									int	j;
									int	k;

									if (lastchar != cut->eob)
									{
										*ep = lastchar;
										if ((c = mb2wc(w, cp, ep - cp)) > 0)
											break;
									}
									if (copy)
									{
										empty = 0;
										if ((c = cp - copy) > 0 && sfwrite(fdout, (char*)copy, c) < 0)
											goto failed;
									}
									for (i = 0; i <= (ep - cp); i++)
										mb[i] = cp[i];
									if (!(bp = (unsigned char*)sfreserve(fdin, SF_UNBOUND, -1)) || (c = sfvalue(fdin)) <= 0)
										goto failed;
									cp = bp;
									ep = cp + --c;
									if ((lastchar = cp[c]) != cut->eob)
										*ep = cut->eob;
									j = i;
									k = 0;
									while (j < mbmax())
										mb[j++] = cp[k++];
									if ((c = mb2wc(w, (char*)mb, j)) <= 0)
									{
										c = i;
										w = 0;
									}
									first = bp = cp += c - i;
									if (copy)
									{
										copy = bp;
										if (w == cut->ldelim.chr)
											lastchar = cut->ldelim.chr;
										else if (w != cut->wdelim.chr)
										{
											empty = 0;
											if (sfwrite(fdout, (char*)mb, c) < 0)
												goto failed;
										}
									}
									c = 0;
								}
								else
								{
									w = *cp;
									c = 1;
								}
								break;
							}
							cp += c;
							c = w;
							if (c == cut->wdelim.chr)
							{
								c = SP_WORD;
								break;
							}
							if (c == cut->ldelim.chr)
							{
								c = SP_LINE;
								break;
							}
							continue;
						default:
							wp = cp - 1;
							break;
						}
						break;
					}
				else
				{
					while (!(c = sp[*cp++]));
					wp = cp - 1;
				}
				/* check for end-of-line */
				if (c == SP_LINE)
				{
					if (cp <= ep)
						break;
					if (lastchar == cut->ldelim.chr)
						break;
					/* restore cut->last character */
					if (lastchar != cut->eob)
						*ep = lastchar;
					inword++;
					if (!sp[lastchar])
						break;
				}
				nodelim = 0;	
				if (--nfields > 0)
					continue;
				nfields = *++lp;
				if (copy)
				{
					empty = 0;
					if ((c = wp - copy) > 0 && sfwrite(fdout, (char*)copy, c) < 0)
						goto failed;
					copy = 0;
				}
				else
					/* set to delimiter unless the first field */
					copy = empty ? cp : wp;
			} while (!inword);
			if (!inword)
			{
				if (!copy)
				{
					if (nodelim)
					{
						if (!cut->sflag)
						{
							if (offset)
							{
								sfseek(fdtmp,(Sfoff_t)0,SEEK_SET);
								sfmove(fdtmp,fdout,offset,-1);
							}
							copy = first;
						}
					}
					else
						sfputc(fdout,'\n');
				}
				if (offset)
					sfseek(fdtmp,offset=0,SEEK_SET);
			}
			if (copy && (c=cp-copy)>0 && (!nodelim || !cut->sflag) && sfwrite(fdout,(char*)copy,c)< 0)
				goto failed;
		}
		/* see whether to save in tmp file */
		if(inword && nodelim && !cut->sflag && (c=cp-first)>0)
		{
			/* copy line to tmpfile in case no fields */
			if(!fdtmp)
				fdtmp = sftmp(BLOCK);
			sfwrite(fdtmp,(char*)first,c);
			offset +=c;
		}
	}
 failed:
	if(fdtmp)
		sfclose(fdtmp);
}
String * LibraryItem::getContainer()
{
	String *result = new String(mb2wc(containerPath.c_str()));
	return result;
}
String * LibraryItem::getTitle()
{
	String *result = new String(mb2wc(title.c_str()));
	return result;
}
String * LibraryItem::getFilename()
{
	String *result = new String(mb2wc(localPath.c_str()));
	return result;
}
String * LibraryItem::getAlbum()
{
	String *result = new String(mb2wc(album.c_str()));
	return result;
}
String * LibraryItem::getArtist()
{
	String *result = new String(mb2wc(artist.c_str()));
	return result;
}
Exemple #10
0
int wc_count(Wc_t *wp, Sfio_t *fd, const char* file)
{
	register char*		type = wp->type;
	register unsigned char*	cp;
	register Sfoff_t	nbytes;
	register Sfoff_t	nchars;
	register Sfoff_t	nwords;
	register Sfoff_t	nlines;
	register Sfoff_t	eline = -1;
	register Sfoff_t	longest = 0;
	register ssize_t	c;
	register unsigned char*	endbuff;
	register int		lasttype = WC_SP;
	unsigned int		lastchar;
	ssize_t			n;
	ssize_t			o;
	unsigned char*		buff;
	wchar_t			x;
	unsigned char		side[32];

	sfset(fd,SF_WRITE,1);
	nlines = nwords = nchars = nbytes = 0;
	wp->longest = 0;
	if (wp->mb < 0 && (wp->mode & (WC_MBYTE|WC_WORDS)))
	{
		cp = buff = endbuff = 0;
		for (;;)
		{
			if (cp >= endbuff || (n = mb2wc(x, cp, endbuff-cp)) < 0)
			{
				if ((o = endbuff-cp) < sizeof(side))
				{
					if (buff)
					{
						if (o)
							memcpy(side, cp, o);
						mbinit();
					}
					else
						o = 0;
					cp = side + o;
					if (!(buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) || (n = sfvalue(fd)) <= 0)
					{
						if ((nchars - longest) > wp->longest)
							wp->longest = nchars - longest;
						break;
					}
					nbytes += n;
					if ((c = sizeof(side) - o) > n)
						c = n;
					if (c)
						memcpy(cp, buff, c);
					endbuff = buff + n;
					cp = side;
					x = mbchar(cp);
					if ((cp-side) < o)
					{
						cp = buff;
						nchars += (cp-side) - 1;
					}
					else
						cp = buff + (cp-side) - o;
				}
				else
				{
					cp++;
					x = -1;
				}
				if (x == -1 && eline != nlines && !(wp->mode & WC_QUIET))
					eline = invalid(file, nlines);
			}
			else
				cp += n ? n : 1;
			if (x == '\n')
			{
				if ((nchars - longest) > wp->longest)
					wp->longest = nchars - longest;
				longest = nchars + 1;
				nlines++;
				lasttype = 1;
			}
			else if (iswspace(x))
				lasttype = 1;
			else if (lasttype)
			{
				lasttype = 0;
				nwords++;
			}
			nchars++;
		}
		if (!(wp->mode & WC_MBYTE))
			nchars = nbytes;
	}
	else if (!wp->mb && !(wp->mode & WC_LONGEST) || wp->mb > 0 && !(wp->mode & (WC_MBYTE|WC_WORDS|WC_LONGEST)))
	{
		if (!(wp->mode & (WC_MBYTE|WC_WORDS|WC_LONGEST)))
		{
			while ((cp = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0)
			{
				nchars += c;
				endbuff = cp + c;
				if (*--endbuff == '\n')
					nlines++;
				else
					*endbuff = '\n';
				for (;;)
					if (*cp++ == '\n')
					{
						if (cp > endbuff)
							break;
						nlines++;
					}
			}
		}
		else
		{
			while ((cp = buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0)
			{
				nchars += c;
				/* check to see whether first character terminates word */
				if (c==1)
				{
					if (eol(lasttype))
						nlines++;
					if ((c = type[*cp]) && !lasttype)
						nwords++;
					lasttype = c;
					continue;
				}
				if (!lasttype && type[*cp])
					nwords++;
				lastchar = cp[--c];
				*(endbuff = cp+c) = '\n';
				c = lasttype;
				/* process each buffer */
				for (;;)
				{
					/* process spaces and new-lines */
					do
					{
						if (eol(c))
							for (;;)
							{
								/* check for end of buffer */
								if (cp > endbuff)
									goto beob;
								nlines++;
								if (*cp != '\n')
									break;
								cp++;
							}
					} while (c = type[*cp++]);
					/* skip over word characters */
					while (!(c = type[*cp++]));
					nwords++;
				}
			beob:
				if ((cp -= 2) >= buff)
					c = type[*cp];
				else
					c = lasttype;
				lasttype = type[lastchar];
				/* see if was in word */
				if (!c && !lasttype)
					nwords--;
			}
			if (eol(lasttype))
				nlines++;
			else if (!lasttype)
				nwords++;
		}
	}
	else
	{
		int		lineoff=0;
		int		skip=0;
		int		adjust=0;
		int		state=0;
		int		oldc;
		int		xspace;
		int		wasspace = 1;
		unsigned char*	start;

		lastchar = 0;
		start = (endbuff = side) + 1;
		xspace = iswspace(0xa0) || iswspace(0x85);
		while ((cp = buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0)
		{
			nbytes += c;
			nchars += c;
			start = cp-lineoff;
			/* check to see whether first character terminates word */
			if(c==1)
			{
				if(eol(lasttype))
					nlines++;
				if((c = type[*cp]) && !lasttype)
					nwords++;
				lasttype = c;
				endbuff = start;
				continue;
			}
			lastchar = cp[--c];
			endbuff = cp+c;
			cp[c] = '\n';
			if(mbc(lasttype))
			{
				c = lasttype;
				goto mbyte;
			}
			if(!lasttype && spc(type[*cp]))
				nwords++;
			c = lasttype;
			/* process each buffer */
			for (;;)
			{
				/* process spaces and new-lines */
			spaces:
				do
				{
					if (eol(c))
					{
						/* check for end of buffer */
						if (cp > endbuff)
							goto eob;
						if(wp->mode&WC_LONGEST)
						{
							if((cp-start)-adjust > longest)
								longest = (cp-start)-adjust-1;
							start = cp;
						}
						nlines++;
						nchars -= adjust;
						adjust = 0;
					}
				} while (spc(c = type[*cp++]));
				wasspace=1;
				if(mbc(c))
				{
				mbyte:
					do
					{
						if(c&WC_ERR)
							goto err;
						if(skip && (c&7))
							break;
						if(!skip)
						{
							if(!(c&7))
							{
								skip=1;
								break;
							}
							skip = (c&7);
							adjust += skip;
							state = 0;
							if(skip==2 && (cp[-1]&0xc)==0 && (state=(cp[-1]&0x3)))
								oldc = *cp;
							else if(xspace && cp[-1]==0xc2)
							{
								state = 8;
								oldc = *cp;
							}
						}
						else
						{
							skip--;
							if(state && (state=chkstate(state,oldc)))
							{
								if(state==10)
								{
									if(!wasspace)
										nwords++;
									wasspace = 1;
									state=0;
									goto spaces;
								}
								oldc = *cp;
							}
						}
					} while (mbc(c = type[*cp++]));
					wasspace = 0;
					if(skip)
					{
						if(eol(c) && (cp > endbuff))
							goto eob;
				err:
						skip = 0;
						state = 0;
						if(eline!=nlines && !(wp->mode & WC_QUIET))
							eline = invalid(file, nlines);
						while(mbc(c) && ((c|WC_ERR) || (c&7)==0)) 
							c=type[*cp++];
						if(eol(c) && (cp > endbuff))
						{
							c = WC_MB|WC_ERR;
							goto eob;
						}
						if(mbc(c))
							goto mbyte;
						else if(c&WC_SP)
							goto spaces;
					}
					if(spc(c))
					{
						nwords++;
						continue;
					}
				}
				/* skip over word characters */
				while(!(c = type[*cp++]));
				if(mbc(c))
					goto mbyte;
				nwords++;
			}
		eob:
			lineoff = cp-start;
			if((cp -= 2) >= buff)
				c = type[*cp];
			else
				c = lasttype;
			lasttype = type[lastchar];
			/* see if was in word */
			if(!c && !lasttype)
				nwords--;
		}
		if ((wp->mode&WC_LONGEST) && ((endbuff + 1 - start) - adjust - (lastchar == '\n')) > longest)
			longest = (endbuff + 1 - start) - adjust - (lastchar == '\n');
		wp->longest = longest;
		if (eol(lasttype))
			nlines++;
		else if (!lasttype)
			nwords++;
		if (wp->mode & WC_MBYTE)
			nchars -= adjust;
		else
			nchars = nbytes;
	}
	wp->chars = nchars;
	wp->words = nwords;
	wp->lines = nlines;
	return 0;
}
Exemple #11
0
const void* __declspec(dllexport) strncvt (const void* str, int strLen, unsigned int cpFrom, unsigned int cpTo, int* len) {
if (!str) return NULL;
if (strLen<0) strLen = (cpFrom>=1200&&cpFrom<=1203? wcslen(str) : strlen(str));
if (cpFrom==cpTo) {
if (len) *len = strLen;
return str;
}
const void* from = NULL, *result = NULL;
BOOL alloced = FALSE;
switch(cpFrom){
case CP_UTF16_LE: 
from=str;
if (len) *len = strLen;
break;
case CP_UTF16_LE_BOM: 
from = ((const wchar_t*)str) +1; 
strLen--;
if (len) *len = strLen;
break;
case CP_UTF16_BE_BOM: 
from = ((const wchar_t*)str) +1;
strLen--;
case CP_UTF16_BE :
if (!from) from = str;
from = wcsdup(from);
unicodeSwitchEndianess(from, 1+wcslen(from));
if (len) *len = strLen;
alloced=TRUE;
break;
case CP_UTF8_BOM :
from = ((const char*)str) +3;
cpFrom = CP_UTF8;
strLen -= 3;
default :
if (!from) from=str;
from = mb2wc(from, strLen, cpFrom, &strLen);
if (len) *len = strLen;
alloced = TRUE;
break;
}
if (!from) return NULL;
switch(cpTo){
case CP_UTF16_LE: 
result = from;
((wchar_t*)result)[strLen]=0; 
alloced = FALSE;
break;
case CP_UTF16_LE_BOM :
if (alloced) {
result = from;
memmove( ((wchar_t*)result)+1, ((wchar_t*)result), sizeof(wchar_t)*(1+strLen));
}
else {
result = malloc(sizeof(wchar_t) * (4+strLen));
memcpy( ((wchar_t*)result)+1, from, sizeof(wchar_t)*(1+strLen));
}
(*((wchar_t*)result)) = 0xFEFF;
((wchar_t*)result)[++strLen] = 0;
if (len) *len = strLen;
alloced = FALSE;
break;
case CP_UTF16_BE :
result = (alloced? from : wcsdup(from));
unicodeSwitchEndianess(result, 3+wcslen(result));
alloced = FALSE;
((wchar_t*)result)[strLen] = 0;
if (len) *len = strLen;
break;
case CP_UTF16_BE_BOM :
if (alloced) {
result = from;
memmove( ((wchar_t*)result)+1, ((wchar_t*)result), sizeof(wchar_t)*(1+strLen));
}
else {
result = malloc(sizeof(wchar_t) * (4+strLen));
memcpy(((wchar_t*)result)+1, from, sizeof(wchar_t)*(1+strLen));
}
(*(((wchar_t*)result))) = 0xFEFF;
((wchar_t*)result)[++strLen] = 0;
unicodeSwitchEndianess(result, 3+strLen);
if (len) *len = strLen;
alloced = FALSE;
break;
case CP_UTF8_BOM :
result = wc2mb(from, strLen, CP_UTF8, &strLen);
memmove( ((const char*)result)+3, ((const char*)result), 1+strLen);
memcpy(result, "\xEF\xBB\xBF", 3);
if (len) (*len) = strLen+3;
break;
default :
result = wc2mb(from, strLen, cpTo, &strLen);
if (len) *len = strLen;
break;
}
if (alloced) free(from);
return result;
}
const wchar_t* fromEncoding (const char* str, int encoding) {
if (!str) return NULL;
if (strptr) free(strptr);
strptr = mb2wc(str, encoding);
return strptr;
}