/*------------------------------------------------------------ * Vport puts */ static void vport_puts(ScmString *s, ScmPort *p) { vport *data = (vport*)p->src.vt.data; const ScmStringBody *b = SCM_STRING_BODY(s); SCM_ASSERT(data != NULL); if (!SCM_FALSEP(data->puts_proc)) { Scm_ApplyRec(data->puts_proc, SCM_LIST1(SCM_OBJ(s))); } else if (SCM_STRING_BODY_INCOMPLETE_P(b) || (SCM_FALSEP(data->putc_proc) && !SCM_FALSEP(data->putb_proc))) { /* we perform binary output */ vport_putz(SCM_STRING_BODY_START(b), SCM_STRING_BODY_SIZE(b), p); } else if (!SCM_FALSEP(data->putc_proc)) { ScmChar c; int i; const char *cp = SCM_STRING_BODY_START(b); for (i=0; i < (int)SCM_STRING_BODY_LENGTH(b); i++) { SCM_CHAR_GET(cp, c); cp += SCM_CHAR_NFOLLOWS(*cp)+1; Scm_ApplyRec(data->putc_proc, SCM_LIST1(SCM_MAKE_CHAR(c))); } } else { Scm_PortError(p, SCM_PORT_ERROR_OTHER, "cannot perform output to the port %S", p); } }
static int getc_scratch_unsafe(ScmPort *p) #endif { char tbuf[SCM_CHAR_MAX_BYTES]; int nb = SCM_CHAR_NFOLLOWS(p->scratch[0]); int curr = p->scrcnt; memcpy(tbuf, p->scratch, curr); p->scrcnt = 0; for (int i=curr; i<=nb; i++) { int r = EOF; SAFE_CALL(p, r = Scm_Getb(p)); if (r == EOF) { UNLOCK(p); Scm_PortError(p, SCM_PORT_ERROR_INPUT, "encountered EOF in middle of a multibyte character from port %S", p); } tbuf[i] = (char)r; } int ch; SCM_CHAR_GET(tbuf, ch); if (ch == SCM_CHAR_INVALID) { /* This can happen if the input contains invalid byte sequence. We return the stray byte (which would eventually result an incomplete string when accumulated), while keeping the remaining bytes in the scrach buffer. */ ch = (ScmChar)(tbuf[0] & 0xff); memcpy(p->scratch, tbuf+1, nb); p->scrcnt = nb; } return ch; }
/*------------------------------------------------------------ * Vport Getc */ static int vport_getc(ScmPort *p) { vport *data = (vport*)p->src.vt.data; SCM_ASSERT(data != NULL); if (SCM_FALSEP(data->getc_proc)) { /* If the port doesn't have get-char method, try get-byte */ ScmObj b; int n, i; ScmChar ch; char buf[SCM_CHAR_MAX_BYTES]; if (SCM_FALSEP(data->getb_proc)) return EOF; b = Scm_ApplyRec(data->getb_proc, SCM_NIL); if (!SCM_INTP(b)) return EOF; buf[0] = (char)SCM_INT_VALUE(b); n = SCM_CHAR_NFOLLOWS(p->scratch[0]); for (i=0; i<n; i++) { b = Scm_ApplyRec(data->getb_proc, SCM_NIL); if (!SCM_INTP(b)) { /* TODO: should raise an exception? */ return EOF; } buf[i+1] = (char)SCM_INT_VALUE(b); } SCM_CHAR_GET(buf, ch); return ch; } else { ScmObj ch = Scm_ApplyRec(data->getc_proc, SCM_NIL); if (!SCM_CHARP(ch)) return EOF; return SCM_CHAR_VALUE(ch); } }
static ScmChar ucstochar(int ucs4) { #if defined(GAUCHE_CHAR_ENCODING_UTF_8) return (ScmChar)ucs4; #else /*!GAUCHE_CHAR_ENCODING_UTF_8*/ char inbuf[6], outbuf[6]; const char *inb = inbuf; char *outb = outbuf; if (ucsconv.ucs2char == NULL) return SCM_CHAR_INVALID; size_t inroom = UCS2UTF_NBYTES(ucs4); size_t outroom = 6; jconv_ucs4_to_utf8(ucs4, inbuf); (void)SCM_INTERNAL_MUTEX_LOCK(ucsconv.mutex); size_t r = jconv(ucsconv.ucs2char, &inb, &inroom, &outb, &outroom); (void)SCM_INTERNAL_MUTEX_UNLOCK(ucsconv.mutex); if (r == INPUT_NOT_ENOUGH || r == OUTPUT_NOT_ENOUGH) { Scm_Error("can't convert UCS4 code %d to a character: implementation problem?", ucs4); } if (r == ILLEGAL_SEQUENCE) { return SCM_CHAR_INVALID; } else { ScmChar out; SCM_CHAR_GET(outbuf, out); return out; } #endif /*!GAUCHE_CHAR_ENCODING_UTF_8*/ }
ScmObj read_predef_charset(const char **cp, int error_p) { int i; char name[MAX_CHARSET_NAME_LEN]; for (i=0; i<MAX_CHARSET_NAME_LEN; i++) { ScmChar ch; SCM_CHAR_GET(*cp, ch); if (ch == SCM_CHAR_INVALID) return SCM_FALSE; *cp += SCM_CHAR_NBYTES(ch); if (!SCM_CHAR_ASCII_P(ch)) break; if (ch != ']') { name[i] = (char)ch; continue; } if (strncmp(name, ":alnum:", 7) == 0) { return Scm_GetStandardCharSet(SCM_CHAR_SET_ALNUM); } else if (strncmp(name, ":alpha:", 7) == 0) { return Scm_GetStandardCharSet(SCM_CHAR_SET_ALPHA); } else if (strncmp(name, ":blank:", 7) == 0) { return Scm_GetStandardCharSet(SCM_CHAR_SET_BLANK); } else if (strncmp(name, ":cntrl:", 7) == 0) { return Scm_GetStandardCharSet(SCM_CHAR_SET_CNTRL); } else if (strncmp(name, ":digit:", 7) == 0) { return Scm_GetStandardCharSet(SCM_CHAR_SET_DIGIT); } else if (strncmp(name, ":graph:", 7) == 0) { return Scm_GetStandardCharSet(SCM_CHAR_SET_GRAPH); } else if (strncmp(name, ":lower:", 7) == 0) { return Scm_GetStandardCharSet(SCM_CHAR_SET_LOWER); } else if (strncmp(name, ":print:", 7) == 0) { return Scm_GetStandardCharSet(SCM_CHAR_SET_PRINT); } else if (strncmp(name, ":punct:", 7) == 0) { return Scm_GetStandardCharSet(SCM_CHAR_SET_PUNCT); } else if (strncmp(name, ":space:", 7) == 0) { return Scm_GetStandardCharSet(SCM_CHAR_SET_SPACE); } else if (strncmp(name, ":upper:", 7) == 0) { return Scm_GetStandardCharSet(SCM_CHAR_SET_UPPER); } else if (strncmp(name, ":xdigit:", 8) == 0) { return Scm_GetStandardCharSet(SCM_CHAR_SET_XDIGIT); } else break; } /* here we got invalid charset name */ if (error_p) { name[i] = '\0'; Scm_Error("invalid or unsupported POSIX charset '[%s]'", name); } return SCM_FALSE; }
int Scm_GetcUnsafe(ScmPort *p) #endif { VMDECL; SHORTCUT(p, return Scm_GetcUnsafe(p)); LOCK(p); CLOSE_CHECK(p); if (p->scrcnt > 0) { int r = GETC_SCRATCH(p); UNLOCK(p); return r; } if (p->ungotten != SCM_CHAR_INVALID) { int c = p->ungotten; p->ungotten = SCM_CHAR_INVALID; UNLOCK(p); return c; } switch (SCM_PORT_TYPE(p)) { case SCM_PORT_FILE: { int c = 0; if (p->src.buf.current >= p->src.buf.end) { int r = 0; SAFE_CALL(p, r = bufport_fill(p, 1, FALSE)); if (r == 0) { UNLOCK(p); return EOF; } } int first = (unsigned char)*p->src.buf.current++; int nb = SCM_CHAR_NFOLLOWS(first); p->bytes++; if (nb > 0) { if (p->src.buf.current + nb > p->src.buf.end) { /* The buffer doesn't have enough bytes to consist a char. move the incomplete char to the scratch buffer and try to fetch the rest of the char. */ int rest, filled = 0; p->scrcnt = (unsigned char)(p->src.buf.end - p->src.buf.current + 1); memcpy(p->scratch, p->src.buf.current-1, p->scrcnt); p->src.buf.current = p->src.buf.end; rest = nb + 1 - p->scrcnt; for (;;) { SAFE_CALL(p, filled = bufport_fill(p, rest, FALSE)); if (filled <= 0) { /* TODO: make this behavior customizable */ UNLOCK(p); Scm_PortError(p, SCM_PORT_ERROR_INPUT, "encountered EOF in middle of a multibyte character from port %S", p); } if (filled >= rest) { memcpy(p->scratch+p->scrcnt, p->src.buf.current, rest); p->scrcnt += rest; p->src.buf.current += rest; break; } else { memcpy(p->scratch+p->scrcnt, p->src.buf.current, filled); p->scrcnt += filled; p->src.buf.current = p->src.buf.end; rest -= filled; } } SCM_CHAR_GET(p->scratch, c); p->scrcnt = 0; } else { SCM_CHAR_GET(p->src.buf.current-1, c); p->src.buf.current += nb; } p->bytes += nb; } else { c = first; if (c == '\n') p->line++; } UNLOCK(p); return c; } case SCM_PORT_ISTR: { if (p->src.istr.current >= p->src.istr.end) { UNLOCK(p); return EOF; } int c = 0; int first = (unsigned char)*p->src.istr.current++; int nb = SCM_CHAR_NFOLLOWS(first); p->bytes++; if (nb > 0) { if (p->src.istr.current + nb > p->src.istr.end) { /* TODO: make this behavior customizable */ UNLOCK(p); Scm_PortError(p, SCM_PORT_ERROR_INPUT, "encountered EOF in middle of a multibyte character from port %S", p); } SCM_CHAR_GET(p->src.istr.current-1, c); p->src.istr.current += nb; p->bytes += nb; } else { c = first; if (c == '\n') p->line++; } UNLOCK(p); return c; } case SCM_PORT_PROC: { int c = 0; SAFE_CALL(p, c = p->src.vt.Getc(p)); if (c == '\n') p->line++; UNLOCK(p); return c; } default: UNLOCK(p); Scm_PortError(p, SCM_PORT_ERROR_INPUT, "bad port type for input: %S", p); } return 0;/*dummy*/ }
/* internal function to write symbol name, with proper escaping */ void Scm_WriteSymbolName(ScmString *snam, ScmPort *port, ScmWriteContext *ctx, u_int flags) { /* See if we have special characters, and use |-escape if necessary. */ /* TODO: For now, we regard chars over 0x80 is all "printable". Need a more consistent mechanism. */ const ScmStringBody *b = SCM_STRING_BODY(snam); const char *p = SCM_STRING_BODY_START(b); int siz = SCM_STRING_BODY_SIZE(b); int escape = FALSE; int spmask = (Scm_WriteContextCase(ctx) == SCM_WRITE_CASE_FOLD)? 0x12 : 0x02; if (siz == 0) { /* special case */ if (!(flags & SCM_SYMBOL_WRITER_NOESCAPE_EMPTY)) { SCM_PUTZ("||", -1, port); } return; } if (siz == 1 && (*p == '+' || *p == '-')) { SCM_PUTC((unsigned)*p, port); return; } if ((unsigned int)*p < 128 && (special[(unsigned int)*p]&1) #if GAUCHE_UNIFY_SYMBOL_KEYWORD && (*p != ':') #endif && (!(flags & SCM_SYMBOL_WRITER_NOESCAPE_INITIAL))) { escape = TRUE; } else { const char *q = p; for (int i=0; i<siz; i++, q++) { if ((unsigned int)*q < 128 && (special[(unsigned int)*q]&spmask)) { escape = TRUE; break; } } } if (escape) { SCM_PUTC('|', port); for (const char *q=p; q<p+siz; ) { unsigned int ch; SCM_CHAR_GET(q, ch); q += SCM_CHAR_NBYTES(ch); if (ch < 128) { if (special[ch] & 8) { SCM_PUTC('\\', port); SCM_PUTC(ch, port); } else if (special[ch] & 4) { Scm_Printf(port, "\\x%02x;", ch); } else { SCM_PUTC(ch, port); } } else { SCM_PUTC(ch, port); } } SCM_PUTC('|', port); return; } else { SCM_PUTS(snam, port); } }
ScmObj Scm_CharSetRead(ScmPort *input, int *complement_p, int error_p, int bracket_syntax) { int complement = FALSE; ScmDString buf; Scm_DStringInit(&buf); if (read_charset_syntax(input, bracket_syntax, &buf, &complement)) { int lastchar = -1, inrange = FALSE, moreset_complement = FALSE; ScmCharSet *set = SCM_CHAR_SET(Scm_MakeEmptyCharSet()); int size; const char *cp = Scm_DStringPeek(&buf, &size, NULL); const char *end = cp + size; while (cp < end) { ScmChar ch; SCM_CHAR_GET(cp, ch); if (ch == SCM_CHAR_INVALID) goto err; cp += SCM_CHAR_NBYTES(ch); ScmObj moreset; switch (ch) { case '-': if (inrange) goto ordchar; inrange = TRUE; continue; case '\\': if (cp >= end) goto err; SCM_CHAR_GET(cp, ch); if (ch == SCM_CHAR_INVALID) goto err; cp += SCM_CHAR_NBYTES(ch); switch (ch) { case 'a': ch = 7; goto ordchar; case 'b': ch = 8; goto ordchar; case 'n': ch = '\n'; goto ordchar; case 'r': ch = '\r'; goto ordchar; case 't': ch = '\t'; goto ordchar; case 'f': ch = '\f'; goto ordchar; case 'e': ch = 0x1b; goto ordchar; case 'x': case 'u': case 'U': ch = Scm_ReadXdigitsFromString(cp, end-cp, ch, Scm_GetPortReaderLexicalMode(input), TRUE, &cp); if (ch == SCM_CHAR_INVALID) goto err; goto ordchar; case 'd': moreset_complement = FALSE; moreset = Scm_GetStandardCharSet(SCM_CHAR_SET_DIGIT); break; case 'D': moreset_complement = TRUE; moreset = Scm_GetStandardCharSet(SCM_CHAR_SET_DIGIT); break; case 's': moreset_complement = FALSE; moreset = Scm_GetStandardCharSet(SCM_CHAR_SET_SPACE); break; case 'S': moreset_complement = TRUE; moreset = Scm_GetStandardCharSet(SCM_CHAR_SET_SPACE); break; case 'w': moreset_complement = FALSE; moreset = Scm_GetStandardCharSet(SCM_CHAR_SET_WORD); break; case 'W': moreset_complement = TRUE; moreset = Scm_GetStandardCharSet(SCM_CHAR_SET_WORD); break; default: goto ordchar; } if (moreset_complement) { moreset = Scm_CharSetComplement(SCM_CHAR_SET(Scm_CharSetCopy(SCM_CHAR_SET(moreset)))); } Scm_CharSetAdd(set, SCM_CHAR_SET(moreset)); continue; case '[': moreset = read_predef_charset(&cp, error_p); if (!SCM_CHAR_SET_P(moreset)) goto err; Scm_CharSetAdd(set, SCM_CHAR_SET(moreset)); continue; ordchar: default: if (inrange) { if (lastchar < 0) { Scm_CharSetAddRange(set, '-', '-'); Scm_CharSetAddRange(set, ch, ch); lastchar = ch; } else { Scm_CharSetAddRange(set, lastchar, ch); lastchar = -1; } inrange = FALSE; } else { Scm_CharSetAddRange(set, ch, ch); lastchar = ch; } continue; } break; } if (inrange) { Scm_CharSetAddRange(set, '-', '-'); if (lastchar >= 0) Scm_CharSetAddRange(set, lastchar, lastchar); } if (complement_p) { *complement_p = complement; return SCM_OBJ(set); } else { if (complement) Scm_CharSetComplement(set); return SCM_OBJ(set); } } err: if (error_p) { /* TODO: We should deal with the case when input contains \0 */ Scm_Error("Invalid charset syntax [%s%s...", complement? "^" : "", Scm_DStringPeek(&buf, NULL, NULL)); } return SCM_FALSE; }