/*------------------------------------------------------------ * Vport Getb */ static int vport_getb(ScmPort *p) { vport *data = (vport*)p->src.vt.data; SCM_ASSERT(data != NULL); if (SCM_FALSEP(data->getb_proc)) { /* If the port doesn't have get-byte method, use get-char if possible. */ ScmObj ch; ScmChar c; char buf[SCM_CHAR_MAX_BYTES]; int nb, i; if (SCM_FALSEP(data->getc_proc)) return EOF; ch = Scm_ApplyRec(data->getc_proc, SCM_NIL); if (!SCM_CHARP(ch)) return EOF; c = SCM_CHAR_VALUE(ch); nb = SCM_CHAR_NBYTES(c); SCM_CHAR_PUT(buf, c); for (i=1; i<nb; i++) { /* pushback for later use. this isn't very efficient; if efficiency becomes a problem, we need another API to pushback multiple bytes. */ Scm_UngetbUnsafe(buf[i], p); } return buf[0]; } else { ScmObj b = Scm_ApplyRec(data->getb_proc, SCM_NIL); if (!SCM_INTP(b)) return EOF; return (SCM_INT_VALUE(b) & 0xff); } }
/* handle the case that there's an ungotten char */ static int getb_ungotten(ScmPort *p) { SCM_CHAR_PUT(p->scratch, p->ungotten); p->scrcnt = SCM_CHAR_NBYTES(p->ungotten); p->ungotten = SCM_CHAR_INVALID; return getb_scratch(p); }
static int chartoucs(ScmChar ch) { #if defined(GAUCHE_CHAR_ENCODING_UTF_8) if (ch == SCM_CHAR_INVALID) return -1; return (int)ch; #else /*!GAUCHE_CHAR_ENCODING_UTF_8*/ char inbuf[6], outbuf[6]; const char *inb = inbuf; char *outb = outbuf; if (ch == SCM_CHAR_INVALID) return -1; if (ucsconv.char2ucs == NULL) return -1; size_t inroom = SCM_CHAR_NBYTES(ch); size_t outroom = 6; SCM_CHAR_PUT(inbuf, ch); (void)SCM_INTERNAL_MUTEX_LOCK(ucsconv.mutex); size_t r = jconv(ucsconv.char2ucs, &inb, &inroom, &outb, &outroom); (void)SCM_INTERNAL_MUTEX_UNLOCK(ucsconv.mutex); if (r == INPUT_NOT_ENOUGH || r == OUTPUT_NOT_ENOUGH) { Scm_Error("can't convert character %u to UCS4 code: implementation problem?", ch); } if (r == ILLEGAL_SEQUENCE) { return -1; } else { unsigned char *ucp = (unsigned char*)outbuf; if (ucp[0] < 0x80) return (int)ucp[0]; if (ucp[0] < 0xe0) { return ((ucp[0]&0x1f)<<6) + (ucp[1]&0x3f); } if (ucp[0] < 0xf0) { return ((ucp[0]&0x0f)<<12) + ((ucp[1]&0x3f)<<6) + (ucp[2]&0x3f); } if (ucp[0] < 0xf8) { return ((ucp[0]&0x07)<<18) + ((ucp[1]&0x3f)<<12) + ((ucp[2]&0x3f)<<6) + (ucp[3]&0x3f); } if (ucp[0] < 0xfc) { return ((ucp[0]&0x03)<<24) + ((ucp[1]&0x3f)<<18) + ((ucp[2]&0x3f)<<12) + ((ucp[3]&0x3f)<<6) + (ucp[4]&0x3f); } if (ucp[0] < 0xfe) { return ((ucp[0]&0x01)<<30) + ((ucp[1]&0x3f)<<24) + ((ucp[2]&0x3f)<<18) + ((ucp[3]&0x3f)<<12) + ((ucp[4]&0x3f)<<6) + (ucp[5]&0x3f); } return -1; } #endif /*!GAUCHE_CHAR_ENCODING_UTF_8*/ }
static off_t port_pending_bytes(ScmPort *p) { off_t unread_bytes = p->scrcnt; if (p->ungotten != SCM_CHAR_INVALID) { unread_bytes += SCM_CHAR_NBYTES(p->ungotten); } return unread_bytes; }
int Scm_GetzUnsafe(char *buf, int buflen, ScmPort *p) #endif { VMDECL; SHORTCUT(p, return Scm_GetzUnsafe(buf, buflen, p)); LOCK(p); CLOSE_CHECK(p); if (p->scrcnt) { int r = GETZ_SCRATCH(buf, buflen, p); UNLOCK(p); return r; } if (p->ungotten != SCM_CHAR_INVALID) { p->scrcnt = SCM_CHAR_NBYTES(p->ungotten); SCM_CHAR_PUT(p->scratch, p->ungotten); p->ungotten = SCM_CHAR_INVALID; int r = GETZ_SCRATCH(buf, buflen, p); UNLOCK(p); return r; } switch (SCM_PORT_TYPE(p)) { case SCM_PORT_FILE: { int siz = 0; SAFE_CALL(p, siz = bufport_read(p, buf, buflen)); p->bytes += siz; UNLOCK(p); if (siz == 0) return EOF; else return siz; } case SCM_PORT_ISTR: { int r = GETZ_ISTR(p, buf, buflen); p->bytes += r; UNLOCK(p); return r; } case SCM_PORT_PROC: { int r = 0; SAFE_CALL(p, r = p->src.vt.Getz(buf, buflen, p)); p->bytes += r; UNLOCK(p); return r; } default: UNLOCK(p); Scm_PortError(p, SCM_PORT_ERROR_INPUT, "bad port type for input: %S", p); } return -1; /* dummy */ }
ScmObj read_predef_charset(const char **cp, int error_p) { int i; char name[MAX_CHARSET_NAME_LEN]; for (i=0; i<MAX_CHARSET_NAME_LEN; i++) { ScmChar ch; SCM_CHAR_GET(*cp, ch); if (ch == SCM_CHAR_INVALID) return SCM_FALSE; *cp += SCM_CHAR_NBYTES(ch); if (!SCM_CHAR_ASCII_P(ch)) break; if (ch != ']') { name[i] = (char)ch; continue; } if (strncmp(name, ":alnum:", 7) == 0) { return Scm_GetStandardCharSet(SCM_CHAR_SET_ALNUM); } else if (strncmp(name, ":alpha:", 7) == 0) { return Scm_GetStandardCharSet(SCM_CHAR_SET_ALPHA); } else if (strncmp(name, ":blank:", 7) == 0) { return Scm_GetStandardCharSet(SCM_CHAR_SET_BLANK); } else if (strncmp(name, ":cntrl:", 7) == 0) { return Scm_GetStandardCharSet(SCM_CHAR_SET_CNTRL); } else if (strncmp(name, ":digit:", 7) == 0) { return Scm_GetStandardCharSet(SCM_CHAR_SET_DIGIT); } else if (strncmp(name, ":graph:", 7) == 0) { return Scm_GetStandardCharSet(SCM_CHAR_SET_GRAPH); } else if (strncmp(name, ":lower:", 7) == 0) { return Scm_GetStandardCharSet(SCM_CHAR_SET_LOWER); } else if (strncmp(name, ":print:", 7) == 0) { return Scm_GetStandardCharSet(SCM_CHAR_SET_PRINT); } else if (strncmp(name, ":punct:", 7) == 0) { return Scm_GetStandardCharSet(SCM_CHAR_SET_PUNCT); } else if (strncmp(name, ":space:", 7) == 0) { return Scm_GetStandardCharSet(SCM_CHAR_SET_SPACE); } else if (strncmp(name, ":upper:", 7) == 0) { return Scm_GetStandardCharSet(SCM_CHAR_SET_UPPER); } else if (strncmp(name, ":xdigit:", 8) == 0) { return Scm_GetStandardCharSet(SCM_CHAR_SET_XDIGIT); } else break; } /* here we got invalid charset name */ if (error_p) { name[i] = '\0'; Scm_Error("invalid or unsupported POSIX charset '[%s]'", name); } return SCM_FALSE; }
void Scm_PutcUnsafe(ScmChar c, ScmPort *p) #endif { VMDECL; SHORTCUT(p, Scm_PutcUnsafe(c, p); return); WALKER_CHECK(p); LOCK(p); CLOSE_CHECK(p); switch (SCM_PORT_TYPE(p)) { case SCM_PORT_FILE: { int nb = SCM_CHAR_NBYTES(c); if (p->src.buf.current+nb > p->src.buf.end) { SAFE_CALL(p, bufport_flush(p, (int)(p->src.buf.current - p->src.buf.buffer), FALSE)); } SCM_ASSERT(p->src.buf.current+nb <= p->src.buf.end); SCM_CHAR_PUT(p->src.buf.current, c); p->src.buf.current += nb; if (SCM_PORT_BUFFER_MODE(p) == SCM_PORT_BUFFER_LINE) { if (c == '\n') { SAFE_CALL(p, bufport_flush(p, nb, FALSE)); } } else if (SCM_PORT_BUFFER_MODE(p) == SCM_PORT_BUFFER_NONE) { SAFE_CALL(p, bufport_flush(p, nb, FALSE)); } UNLOCK(p); break; } case SCM_PORT_OSTR: SCM_DSTRING_PUTC(&p->src.ostr, c); UNLOCK(p); break; case SCM_PORT_PROC: SAFE_CALL(p, p->src.vt.Putc(c, p)); UNLOCK(p); break; default: UNLOCK(p); Scm_PortError(p, SCM_PORT_ERROR_OUTPUT, "bad port type for output: %S", p); } }
/*------------------------------------------------------------ * Vport putc */ static void vport_putc(ScmChar c, ScmPort *p) { vport *data = (vport*)p->src.vt.data; SCM_ASSERT(data != NULL); if (SCM_FALSEP(data->putc_proc)) { if (SCM_FALSEP(data->putb_proc)) { Scm_PortError(p, SCM_PORT_ERROR_OTHER, "cannot perform output to the port %S", p); } else { unsigned char buf[SCM_CHAR_MAX_BYTES]; int i, n=SCM_CHAR_NBYTES(c); SCM_CHAR_PUT(buf, c); for (i=0; i<n; i++) { Scm_ApplyRec(data->putb_proc, SCM_LIST1(SCM_MAKE_INT(buf[i]))); } } } else { Scm_ApplyRec(data->putc_proc, SCM_LIST1(SCM_MAKE_CHAR(c))); } }
/* internal function to write symbol name, with proper escaping */ void Scm_WriteSymbolName(ScmString *snam, ScmPort *port, ScmWriteContext *ctx, u_int flags) { /* See if we have special characters, and use |-escape if necessary. */ /* TODO: For now, we regard chars over 0x80 is all "printable". Need a more consistent mechanism. */ const ScmStringBody *b = SCM_STRING_BODY(snam); const char *p = SCM_STRING_BODY_START(b); int siz = SCM_STRING_BODY_SIZE(b); int escape = FALSE; int spmask = (Scm_WriteContextCase(ctx) == SCM_WRITE_CASE_FOLD)? 0x12 : 0x02; if (siz == 0) { /* special case */ if (!(flags & SCM_SYMBOL_WRITER_NOESCAPE_EMPTY)) { SCM_PUTZ("||", -1, port); } return; } if (siz == 1 && (*p == '+' || *p == '-')) { SCM_PUTC((unsigned)*p, port); return; } if ((unsigned int)*p < 128 && (special[(unsigned int)*p]&1) #if GAUCHE_UNIFY_SYMBOL_KEYWORD && (*p != ':') #endif && (!(flags & SCM_SYMBOL_WRITER_NOESCAPE_INITIAL))) { escape = TRUE; } else { const char *q = p; for (int i=0; i<siz; i++, q++) { if ((unsigned int)*q < 128 && (special[(unsigned int)*q]&spmask)) { escape = TRUE; break; } } } if (escape) { SCM_PUTC('|', port); for (const char *q=p; q<p+siz; ) { unsigned int ch; SCM_CHAR_GET(q, ch); q += SCM_CHAR_NBYTES(ch); if (ch < 128) { if (special[ch] & 8) { SCM_PUTC('\\', port); SCM_PUTC(ch, port); } else if (special[ch] & 4) { Scm_Printf(port, "\\x%02x;", ch); } else { SCM_PUTC(ch, port); } } else { SCM_PUTC(ch, port); } } SCM_PUTC('|', port); return; } else { SCM_PUTS(snam, port); } }
ScmObj Scm_CharSetRead(ScmPort *input, int *complement_p, int error_p, int bracket_syntax) { int complement = FALSE; ScmDString buf; Scm_DStringInit(&buf); if (read_charset_syntax(input, bracket_syntax, &buf, &complement)) { int lastchar = -1, inrange = FALSE, moreset_complement = FALSE; ScmCharSet *set = SCM_CHAR_SET(Scm_MakeEmptyCharSet()); int size; const char *cp = Scm_DStringPeek(&buf, &size, NULL); const char *end = cp + size; while (cp < end) { ScmChar ch; SCM_CHAR_GET(cp, ch); if (ch == SCM_CHAR_INVALID) goto err; cp += SCM_CHAR_NBYTES(ch); ScmObj moreset; switch (ch) { case '-': if (inrange) goto ordchar; inrange = TRUE; continue; case '\\': if (cp >= end) goto err; SCM_CHAR_GET(cp, ch); if (ch == SCM_CHAR_INVALID) goto err; cp += SCM_CHAR_NBYTES(ch); switch (ch) { case 'a': ch = 7; goto ordchar; case 'b': ch = 8; goto ordchar; case 'n': ch = '\n'; goto ordchar; case 'r': ch = '\r'; goto ordchar; case 't': ch = '\t'; goto ordchar; case 'f': ch = '\f'; goto ordchar; case 'e': ch = 0x1b; goto ordchar; case 'x': case 'u': case 'U': ch = Scm_ReadXdigitsFromString(cp, end-cp, ch, Scm_GetPortReaderLexicalMode(input), TRUE, &cp); if (ch == SCM_CHAR_INVALID) goto err; goto ordchar; case 'd': moreset_complement = FALSE; moreset = Scm_GetStandardCharSet(SCM_CHAR_SET_DIGIT); break; case 'D': moreset_complement = TRUE; moreset = Scm_GetStandardCharSet(SCM_CHAR_SET_DIGIT); break; case 's': moreset_complement = FALSE; moreset = Scm_GetStandardCharSet(SCM_CHAR_SET_SPACE); break; case 'S': moreset_complement = TRUE; moreset = Scm_GetStandardCharSet(SCM_CHAR_SET_SPACE); break; case 'w': moreset_complement = FALSE; moreset = Scm_GetStandardCharSet(SCM_CHAR_SET_WORD); break; case 'W': moreset_complement = TRUE; moreset = Scm_GetStandardCharSet(SCM_CHAR_SET_WORD); break; default: goto ordchar; } if (moreset_complement) { moreset = Scm_CharSetComplement(SCM_CHAR_SET(Scm_CharSetCopy(SCM_CHAR_SET(moreset)))); } Scm_CharSetAdd(set, SCM_CHAR_SET(moreset)); continue; case '[': moreset = read_predef_charset(&cp, error_p); if (!SCM_CHAR_SET_P(moreset)) goto err; Scm_CharSetAdd(set, SCM_CHAR_SET(moreset)); continue; ordchar: default: if (inrange) { if (lastchar < 0) { Scm_CharSetAddRange(set, '-', '-'); Scm_CharSetAddRange(set, ch, ch); lastchar = ch; } else { Scm_CharSetAddRange(set, lastchar, ch); lastchar = -1; } inrange = FALSE; } else { Scm_CharSetAddRange(set, ch, ch); lastchar = ch; } continue; } break; } if (inrange) { Scm_CharSetAddRange(set, '-', '-'); if (lastchar >= 0) Scm_CharSetAddRange(set, lastchar, lastchar); } if (complement_p) { *complement_p = complement; return SCM_OBJ(set); } else { if (complement) Scm_CharSetComplement(set); return SCM_OBJ(set); } } err: if (error_p) { /* TODO: We should deal with the case when input contains \0 */ Scm_Error("Invalid charset syntax [%s%s...", complement? "^" : "", Scm_DStringPeek(&buf, NULL, NULL)); } return SCM_FALSE; }