/* Convert Unicode/PdfDocEncoding string into utf-8 */ char * pdf_to_utf8(fz_obj *src) { unsigned char *srcptr = (unsigned char *) fz_to_str_buf(src); char *dstptr, *dst; int srclen = fz_to_str_len(src); int dstlen = 0; int ucs; int i; if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255) { for (i = 2; i + 1 < srclen; i += 2) { ucs = srcptr[i] << 8 | srcptr[i+1]; dstlen += runelen(ucs); } dstptr = dst = fz_malloc(dstlen + 1); for (i = 2; i + 1 < srclen; i += 2) { ucs = srcptr[i] << 8 | srcptr[i+1]; dstptr += runetochar(dstptr, &ucs); } } else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254) { for (i = 2; i + 1 < srclen; i += 2) { ucs = srcptr[i] | srcptr[i+1] << 8; dstlen += runelen(ucs); } dstptr = dst = fz_malloc(dstlen + 1); for (i = 2; i + 1 < srclen; i += 2) { ucs = srcptr[i] | srcptr[i+1] << 8; dstptr += runetochar(dstptr, &ucs); } } else { for (i = 0; i < srclen; i++) dstlen += runelen(pdf_doc_encoding[srcptr[i]]); dstptr = dst = fz_malloc(dstlen + 1); for (i = 0; i < srclen; i++) { ucs = pdf_doc_encoding[srcptr[i]]; dstptr += runetochar(dstptr, &ucs); } } *dstptr = '\0'; return dst; }
fz_error * pdf_toutf8(char **dstp, fz_obj *src) { unsigned char *srcptr = fz_tostrbuf(src); char *dstptr; int srclen = fz_tostrlen(src); int dstlen = 0; int ucs; int i; if (srclen > 2 && srcptr[0] == 254 && srcptr[1] == 255) { for (i = 2; i < srclen; i += 2) { ucs = (srcptr[i] << 8) | srcptr[i+1]; dstlen += runelen(ucs); } dstptr = *dstp = fz_malloc(dstlen + 1); if (!dstptr) return fz_outofmem; for (i = 2; i < srclen; i += 2) { ucs = (srcptr[i] << 8) | srcptr[i+1]; dstptr += runetochar(dstptr, &ucs); } } else { for (i = 0; i < srclen; i++) dstlen += runelen(pdf_docencoding[srcptr[i]]); dstptr = *dstp = fz_malloc(dstlen + 1); if (!dstptr) return fz_outofmem; for (i = 0; i < srclen; i++) { ucs = pdf_docencoding[srcptr[i]]; dstptr += runetochar(dstptr, &ucs); } } *dstptr = '\0'; return nil; }
char * smbstringdup(SmbHeader *h, uchar *base, uchar **bdatap, uchar *edata) { char *p; if (h && h->flags2 & SMB_FLAGS2_UNICODE) { uchar *bdata = *bdatap; uchar *savebdata; Rune r; int l; char *q; l = 0; if ((bdata - base) & 1) bdata++; savebdata = bdata; do { if (bdata + 2 > edata) return nil; r = smbnhgets(bdata); bdata += 2; l += runelen(r); } while (r != 0); p = smbemalloc(l); bdata = savebdata; q = p; do { r = smbnhgets(bdata); bdata += 2; q += runetochar(q, &r); } while (r != 0); *bdatap = bdata; return p; } return smbstrdup(bdatap, edata); }
static void textpush(js_State *J, Rune c) { int n = runelen(c); if (J->lexbuf.len + n > J->lexbuf.cap) { J->lexbuf.cap = J->lexbuf.cap * 2; J->lexbuf.text = js_realloc(J, J->lexbuf.text, J->lexbuf.cap); } J->lexbuf.len += runetochar(J->lexbuf.text + J->lexbuf.len, &c); }
int widebytes(wchar_t *ws) { int n = 0; while (*ws) n += runelen(*ws++); return n+1; }
int wstrutflen(Rune *s) { int n; for(n=0; *s; n+=runelen(*s),s++) ; return n; }
size_t runenlen(const Rune *p, size_t len) { size_t i, n = 0; for(i = 0; i < len; i++) n += runelen(p[i]); return n; }
void rinsert(Bufblock *buf, Rune r) { int n; n = runelen(r); if (buf->current+n > buf->end) growbuf(buf); runetochar(buf->current, &r); buf->current += n; }
int smbbuffergetucs2(SmbBuffer *b, ulong flags, char **sp) { uchar *bdata = b->buf + b->rn; uchar *edata = b->buf + b->wn; Rune r; int l; char *p, *q; uchar *savebdata; int first; l = 0; if ((flags & SMB_STRING_UNALIGNED) == 0 && (bdata - b->buf) & 1) bdata++; savebdata = bdata; first = 1; do { if (bdata + 2 > edata) { l++; break; } r = smbnhgets(bdata); bdata += 2; if (first && (flags & SMB_STRING_PATH) && r != '\\') l++; first = 0; if (flags & SMB_STRING_CONVERT_MASK) r = smbruneconvert(r, flags); l += runelen(r); } while (r != 0); p = smbemalloc(l); bdata = savebdata; q = p; first = 1; do { if (bdata + 2 > edata) { *q = 0; break; } r = smbnhgets(bdata); bdata += 2; if (first && (flags & SMB_STRING_PATH) && r != '\\') *q++ = '/'; first = 0; if (flags & SMB_STRING_CONVERT_MASK) r = smbruneconvert(r, flags); q += runetochar(q, &r); } while (r != 0); b->rn = bdata - b->buf; *sp = p; return 1; }
int charntorune(Rune *p, const char *s, size_t len) { unsigned int n, i = 1; Rune r; if(len == 0) /* can't even look at s[0] */ return 0; r = (unsigned char)s[0]; n = lookup[r/2]; if(n == 1) goto done; if(n == 0) { r = Runeerror; goto done; } r &= 0xFF >> n; if(len > n) len = n; /* add values from continuation bytes */ for(; i < len; i++) { if((s[i] & 0xC0) != 0x80) { /* not a continuation byte */ r = Runeerror; goto done; } /* add bits from continuation byte to rune value * cannot overflow: 6 byte sequences contain 31 bits */ r = (r << 6) | (s[i] & 0x3F); /* 10xxxxxx */ } if(i < n) /* must have reached len limit */ return 0; /* reject invalid or overlong sequences */ if(runelen(r) < (int)n) { r = Runeerror; goto done; } done: *p = r; return i; }
static void pchar(Rune c, Fconv *fp) { int n; n = fp->eout - fp->out; if(n > 0) { if(c < Runeself) { *fp->out++ = c; return; } if(n >= UTFmax || n >= runelen(c)) { n = runetochar(fp->out, &c); fp->out += n; return; } fp->eout = fp->out; } }
int wstrtoutf(char *s, Rune *t, int n) { int i; char *s0; s0 = s; if(n <= 0) return wstrutflen(t)+1; while(*t) { if(n < UTFmax+1 && n < runelen(*t)+1) { *s = 0; return s-s0+wstrutflen(t)+1; } i = runetochar(s, t); s += i; n -= i; t++; } *s = 0; return s-s0; }
char* runes16toutf(char *p, Rune16 *r, int nc) { char *op, *ep; int n, c; Rune rc; op = p; ep = p + nc; while(c = *r++) { n = 1; if(c >= Runeself) n = runelen(c); if(p + n >= ep) break; rc = c; if(c < Runeself) *p++ = c; else p += runetochar(p, &rc); } *p = '\0'; return op; }
/* * How many bytes of output UTF will be produced by quoting (if necessary) this string? * How many runes? How much of the input will be consumed? * The parameter q is filled in by __quotesetup. * The string may be UTF or Runes (s or r). * Return count does not include NUL. * Terminate the scan at the first of: * NUL in input * count exceeded in input * count exceeded on output * *ninp is set to number of input bytes accepted. * nin may be <0 initially, to avoid checking input by count. */ void __quotesetup(char *s, Rune *r, int nin, int nout, Quoteinfo *q, int sharp, int runesout) { int w; Rune c; q->quoted = 0; q->nbytesout = 0; q->nrunesout = 0; q->nbytesin = 0; q->nrunesin = 0; if(sharp || nin==0 || (s && *s=='\0') || (r && *r=='\0')){ if(nout < 2) return; q->quoted = 1; q->nbytesout = 2; q->nrunesout = 2; } for(; nin!=0; nin--){ if(s) w = chartorune(&c, s); else{ c = *r; w = runelen(c); } if(c == '\0') break; if(runesout){ if(q->nrunesout+1 > nout) break; }else{ if(q->nbytesout+w > nout) break; } if((c <= L' ') || (c == L'\'') || (fmtdoquote!=nil && fmtdoquote((int)c))){ if(!q->quoted){ if(runesout){ if(1+q->nrunesout+1+1 > nout) /* no room for quotes */ break; }else{ if(1+q->nbytesout+w+1 > nout) /* no room for quotes */ break; } q->nrunesout += 2; /* include quotes */ q->nbytesout += 2; /* include quotes */ q->quoted = 1; } if(c == '\'') { if(runesout){ if(1+q->nrunesout+1 > nout) /* no room for quotes */ break; }else{ if(1+q->nbytesout+w > nout) /* no room for quotes */ break; } q->nbytesout++; q->nrunesout++; /* quotes reproduce as two characters */ } } /* advance input */ if(s) s += w; else r++; q->nbytesin += w; q->nrunesin++; /* advance output */ q->nbytesout += w; q->nrunesout++; #ifndef PLAN9PORT /* ANSI requires precision in bytes, not Runes. */ nin-= w-1; /* and then n-- in the loop */ #endif } }
char * tcs(char *cs, char *s, long *np) { Channel *sync; Exec *e; Rune r; long i, n; void **a; uchar *us; char buf[BUFSIZE], cmd[50]; char *t, *u; int p[2], q[2]; if(s==nil || *s=='\0' || *np==0){ werrstr("tcs failed: no data"); return s; } if(cs == nil){ werrstr("tcs failed: no charset"); return s; } if(cistrncmp(cs, "utf-8", 5)==0 || cistrncmp(cs, "utf8", 4)==0) return s; for(i=0; tcstab[i].mime!=nil; i++) if(cistrncmp(cs, tcstab[i].mime, strlen(tcstab[i].mime)) == 0) break; if(tcstab[i].mime == nil){ fprint(2, "abaco: charset: %s not supported\n", cs); goto latin1; } if(cistrcmp(tcstab[i].tcs, "8859-1")==0 || cistrcmp(tcstab[i].tcs, "ascii")==0){ latin1: n = 0; for(us=(uchar*)s; *us; us++) n += runelen(*us); n++; t = emalloc(n); for(us=(uchar*)s, u=t; *us; us++){ if(*us>=Winstart && *us<=Winend) *u++ = winchars[*us-Winstart]; else{ r = *us; u += runetochar(u, &r); } } *u = 0; free(s); return t; } if(pipe(p)<0 || pipe(q)<0) error("can't create pipe"); sync = chancreate(sizeof(ulong), 0); if(sync == nil) error("can't create channel"); snprint(cmd, sizeof cmd, "tcs -f %s", tcstab[i].tcs); e = emalloc(sizeof(Exec)); e->p[0] = p[0]; e->p[1] = p[1]; e->q[0] = q[0]; e->q[1] = q[1]; e->cmd = cmd; e->sync = sync; proccreate(execproc, e, STACK); recvul(sync); chanfree(sync); close(p[0]); close(q[1]); /* in case tcs fails */ t = s; sync = chancreate(sizeof(ulong), 0); if(sync == nil) error("can't create channel"); a = emalloc(4*sizeof(void *)); a[0] = sync; a[1] = (void *)p[1]; a[2] = s; a[3] = (void *)*np; proccreate(writeproc, a, STACK); s = nil; while((n = read(q[0], buf, sizeof(buf))) > 0){ s = erealloc(s, i+n+1); memmove(s+i, buf, n); i += n; s[i] = '\0'; } n = recvul(sync); if(n != *np) fprint(2, "tcs: did not write %ld; wrote %uld\n", *np, n); *np = i; chanfree(sync); close(q[0]); if(s == nil){ fprint(2, "tcs failed: can't convert charset=%s to %s\n", cs, tcstab[i].tcs); return t; } free(t); return s; }
int eenter(char *ask, char *buf, int len, Mouse *m) { int done, down, tick, n, h, w, l, i; Image *b, *save, *backcol, *bordcol; Point p, o, t; Rectangle r, sc; Event ev; Rune k; o = screen->r.min; backcol = allocimagemix(display, DPurpleblue, DWhite); bordcol = allocimage(display, Rect(0,0,1,1), screen->chan, 1, DPurpleblue); if(backcol == nil || bordcol == nil) return -1; while(ecankbd()) ekbd(); if(m) o = m->xy; if(buf && len > 0) n = strlen(buf); else { buf = nil; len = 0; n = 0; } k = -1; tick = n; save = nil; done = down = 0; p = stringsize(font, " "); h = p.y; w = p.x; b = screen; sc = b->clipr; replclipr(b, 0, b->r); while(!done){ p = stringsize(font, buf ? buf : ""); if(ask && ask[0]){ if(buf) p.x += w; p.x += stringwidth(font, ask); } r = rectaddpt(insetrect(Rpt(ZP, p), -4), o); p.x = 0; r = rectsubpt(r, p); p = ZP; if(r.min.x < screen->r.min.x) p.x = screen->r.min.x - r.min.x; if(r.min.y < screen->r.min.y) p.y = screen->r.min.y - r.min.y; r = rectaddpt(r, p); p = ZP; if(r.max.x > screen->r.max.x) p.x = r.max.x - screen->r.max.x; if(r.max.y > screen->r.max.y) p.y = r.max.y - screen->r.max.y; r = rectsubpt(r, p); r = insetrect(r, -2); if(save == nil){ save = allocimage(display, r, b->chan, 0, DNofill); if(save == nil){ n = -1; break; } draw(save, r, b, nil, r.min); } draw(b, r, backcol, nil, ZP); border(b, r, 2, bordcol, ZP); p = addpt(r.min, Pt(6, 6)); if(ask && ask[0]){ p = string(b, p, bordcol, ZP, font, ask); if(buf) p.x += w; } if(buf){ t = p; p = stringn(b, p, display->black, ZP, font, buf, utfnlen(buf, tick)); draw(b, Rect(p.x-1, p.y, p.x+2, p.y+3), display->black, nil, ZP); draw(b, Rect(p.x, p.y, p.x+1, p.y+h), display->black, nil, ZP); draw(b, Rect(p.x-1, p.y+h-3, p.x+2, p.y+h), display->black, nil, ZP); p = string(b, p, display->black, ZP, font, buf+tick); } flushimage(display, 1); nodraw: i = Ekeyboard; if(m != nil) i |= Emouse; replclipr(b, 0, sc); i = eread(i, &ev); /* screen might have been resized */ if(b != screen || !eqrect(screen->clipr, sc)){ freeimage(save); save = nil; } b = screen; sc = b->clipr; replclipr(b, 0, b->r); switch(i){ default: done = 1; n = -1; break; case Ekeyboard: k = ev.kbdc; if(buf == nil || k == Keof || k == '\n'){ done = 1; break; } if(k == Knack || k == Kesc){ done = !n; buf[n = tick = 0] = 0; break; } if(k == Ksoh || k == Khome){ tick = 0; continue; } if(k == Kenq || k == Kend){ tick = n; continue; } if(k == Kright){ if(tick < n) tick += chartorune(&k, buf+tick); continue; } if(k == Kleft){ for(i = 0; i < n; i += l){ l = chartorune(&k, buf+tick); if(i+l >= tick){ tick = i; break; } } continue; } if(k == Ketb){ while(tick > 0){ tick--; if(tick == 0 || strchr(" !\"#$%&'()*+,-./:;<=>?@`[\\]^{|}~", buf[tick-1])) break; } buf[n = tick] = 0; break; } if(k == Kbs){ if(tick <= 0) continue; for(i = 0; i < n; i += l){ l = chartorune(&k, buf+i); if(i+l >= tick){ memmove(buf+i, buf+i+l, n - (i+l)); buf[n -= l] = 0; tick -= l; break; } } break; } if(k < 0x20 || k == Kdel || (k & 0xFF00) == KF || (k & 0xFF00) == Spec) continue; if((len-n) <= (l = runelen(k))) continue; memmove(buf+tick+l, buf+tick, n - tick); runetochar(buf+tick, &k); buf[n += l] = 0; tick += l; break; case Emouse: *m = ev.mouse; if(!ptinrect(m->xy, r)){ down = 0; goto nodraw; } if(m->buttons & 7){ down = 1; if(buf && m->xy.x >= (t.x - w)){ down = 0; for(i = 0; i < n; i += l){ l = chartorune(&k, buf+i); t.x += stringnwidth(font, buf+i, 1); if(t.x > m->xy.x) break; } tick = i; } continue; } done = down; break; } if(save){ draw(b, save->r, save, nil, save->r.min); freeimage(save); save = nil; } } replclipr(b, 0, sc); freeimage(backcol); freeimage(bordcol); flushimage(display, 1); return n; }
char* tcs(char *charset, char *s) { char *buf; int i, n, nbuf; int fd[3], p[2], pp[2]; uchar *us; char *t, *u; char *argv[4]; Rune r; Writeargs *w; if(s == nil || charset == nil || *s == 0) return s; if(cistrcmp(charset, "utf-8") == 0) return s; if(cistrcmp(charset, "iso-8859-1") == 0 || cistrcmp(charset, "us-ascii") == 0){ latin1: n = 0; for(us=(uchar*)s; *us; us++) n += runelen(*us); n++; t = emalloc(n); for(us=(uchar*)s, u=t; *us; us++){ r = *us; u += runetochar(u, &r); } *u = 0; free(s); return t; } for(i=0; i<nelem(tcstab); i++) if(cistrcmp(charset, tcstab[i].mime) == 0) goto tcs; goto latin1; tcs: argv[0] = "tcs"; argv[1] = "-f"; argv[2] = charset; argv[3] = nil; if(pipe(p) < 0 || pipe(pp) < 0) sysfatal("pipe: %r"); fd[0] = p[0]; fd[1] = pp[0]; fd[2] = dup(2, -1); if(threadspawnl(fd, "tcs", "tcs", "-f", tcstab[i].tcs, nil) < 0){ close(p[0]); close(p[1]); close(pp[0]); close(pp[1]); close(fd[2]); goto latin1; } close(p[0]); close(pp[0]); nbuf = UTFmax*strlen(s)+100; /* just a guess at worst case */ buf = emalloc(nbuf); w = emalloc(sizeof *w); w->fd = p[1]; w->s = estrdup(s); proccreate(twriter, w, STACK); n = readn(pp[1], buf, nbuf-1); close(pp[1]); if(n <= 0){ free(buf); goto latin1; } buf[n] = 0; free(s); s = estrdup(buf); free(buf); return s; }
static int __sendinput(Window *w, uint32_t q0, uint32_t q1) { char *s, *t; int n, nb, eofchar; static int partial; static char tmp[UTFmax]; Req *r; Rune rune; if(!q) return 0; r = q; n = 0; if(partial){ Partial: nb = partial; if(nb > r->ifcall.count) nb = r->ifcall.count; memmove(r->ofcall.data, tmp, nb); if(nb!=partial) memmove(tmp, tmp+nb, partial-nb); partial -= nb; q = r->aux; if(q == nil) eq = &q; r->aux = nil; r->ofcall.count = nb; if(debug) fprint(2, "satisfy read with partial\n"); respond(r, nil); return n; } if(q0==q1) return 0; s = emalloc((q1-q0)*UTFmax+1); n = winread(w, q0, q1, s); s[n] = '\0'; t = strpbrk(s, "\n\004"); if(t == nil){ free(s); return 0; } r = q; eofchar = 0; if(*t == '\004'){ eofchar = 1; *t = '\0'; }else *++t = '\0'; nb = utfncpy((char*)r->ofcall.data, s, r->ifcall.count); if(nb==0 && s<t && r->ifcall.count > 0){ partial = utfncpy(tmp, s, UTFmax); assert(partial > 0); chartorune(&rune, tmp); partial = runelen(rune); free(s); n = 1; goto Partial; } n = utfnlen(r->ofcall.data, nb); if(nb==strlen(s) && eofchar) n++; r->ofcall.count = nb; q = r->aux; if(q == nil) eq = &q; r->aux = nil; if(debug) fprint(2, "read returns %lud-%lud: %.*q\n", q0, q0+n, n, r->ofcall.data); respond(r, nil); return n; }
int charntorune(Rune *p, const char *s, size_t len) { unsigned int i, n; Rune r; if(len == 0) /* can't even look at s[0] */ return 0; if((s[0] & 0x80) == 0x00) { /* 0xxxxxxx */ *p = s[0]; return 1; } else if((s[0] & 0xE0) == 0xC0) { /* 110xxxxx */ r = s[0] & 0x1F; n = 2; } else if((s[0] & 0xF0) == 0xE0) { /* 1110xxxx */ r = s[0] & 0x0F; n = 3; } else if((s[0] & 0xF8) == 0xF0) { /* 11110xxx */ r = s[0] & 0x07; n = 4; } else if((s[0] & 0xFC) == 0xF8) { /* 111110xx */ r = s[0] & 0x03; n = 5; } else if((s[0] & 0xFE) == 0xFC) { /* 1111110x */ r = s[0] & 0x01; n = 6; } else { /* invalid leading byte */ *p = Runeerror; return 1; } if(len > n) len = n; /* add values from continuation bytes */ for(i = 1; i < len; i++) if((s[i] & 0xC0) == 0x80) { /* add bits from continuation byte to rune value * cannot overflow: 6 byte sequences contain 31 bits */ r = (r << 6) | (s[i] & 0x3F); /* 10xxxxxx */ } else { /* expected continuation */ *p = Runeerror; return i; } if(i < n) /* must have reached len limit */ return 0; /* reject invalid or overlong sequences */ if(runelen(r) < (int)n) r = Runeerror; *p = r; return n; }