STATIC mp_uint_t stringio_write(mp_obj_t o_in, const void *buf, mp_uint_t size, int *errcode) { (void)errcode; mp_obj_stringio_t *o = MP_OBJ_TO_PTR(o_in); check_stringio_is_open(o); if (o->vstr->fixed_buf) { stringio_copy_on_write(o); } mp_uint_t new_pos = o->pos + size; if (new_pos < size) { // Writing <size> bytes will overflow o->pos beyond limit of mp_uint_t. *errcode = MP_EFBIG; return MP_STREAM_ERROR; } mp_uint_t org_len = o->vstr->len; if (new_pos > o->vstr->alloc) { // Take all what's already allocated... o->vstr->len = o->vstr->alloc; // ... and add more vstr_add_len(o->vstr, new_pos - o->vstr->alloc); } // If there was a seek past EOF, clear the hole if (o->pos > org_len) { memset(o->vstr->buf + org_len, 0, o->pos - org_len); } memcpy(o->vstr->buf + o->pos, buf, size); o->pos = new_pos; if (new_pos > o->vstr->len) { o->vstr->len = new_pos; } return size; }
// Unbuffered, inefficient implementation of readline() for raw I/O files. STATIC mp_obj_t stream_unbuffered_readline(uint n_args, const mp_obj_t *args) { struct _mp_obj_base_t *o = (struct _mp_obj_base_t *)args[0]; if (o->type->stream_p == NULL || o->type->stream_p->read == NULL) { // CPython: io.UnsupportedOperation, OSError subclass nlr_raise(mp_obj_new_exception_msg(&mp_type_OSError, "Operation not supported")); } mp_int_t max_size = -1; if (n_args > 1) { max_size = MP_OBJ_SMALL_INT_VALUE(args[1]); } vstr_t *vstr; if (max_size != -1) { vstr = vstr_new_size(max_size); } else { vstr = vstr_new(); } int error; while (max_size == -1 || max_size-- != 0) { char *p = vstr_add_len(vstr, 1); if (p == NULL) { nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_MemoryError, "out of memory")); } mp_int_t out_sz = o->type->stream_p->read(o, p, 1, &error); if (out_sz == -1) { nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_OSError, "[Errno %d]", error)); } if (out_sz == 0) { // Back out previously added byte // TODO: This is a bit hacky, does it supported by vstr API contract? // Consider, what's better - read a char and get OutOfMemory (so read // char is lost), or allocate first as we do. vstr_add_len(vstr, -1); break; } if (*p == '\n') { break; } } // TODO need a string creation API that doesn't copy the given data mp_obj_t ret = mp_obj_new_str_of_type(STREAM_CONTENT_TYPE(o->type->stream_p), (byte*)vstr->buf, vstr->len); vstr_free(vstr); return ret; }
// Unbuffered, inefficient implementation of readline() for raw I/O files. STATIC mp_obj_t stream_unbuffered_readline(size_t n_args, const mp_obj_t *args) { const mp_stream_p_t *stream_p = mp_get_stream_raise(args[0], MP_STREAM_OP_READ); mp_int_t max_size = -1; if (n_args > 1) { max_size = MP_OBJ_SMALL_INT_VALUE(args[1]); } vstr_t vstr; if (max_size != -1) { vstr_init(&vstr, max_size); } else { vstr_init(&vstr, 16); } while (max_size == -1 || max_size-- != 0) { char *p = vstr_add_len(&vstr, 1); if (p == NULL) { nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_MemoryError, "out of memory")); } int error; mp_uint_t out_sz = stream_p->read(args[0], p, 1, &error); if (out_sz == MP_STREAM_ERROR) { if (mp_is_nonblocking_error(error)) { if (vstr.len == 1) { // We just incremented it, but otherwise we read nothing // and immediately got EAGAIN. This case is not well // specified in // https://docs.python.org/3/library/io.html#io.IOBase.readline // unlike similar case for read(). But we follow the latter's // behavior - return None. vstr_clear(&vstr); return mp_const_none; } else { goto done; } } mp_raise_OSError(error); } if (out_sz == 0) { done: // Back out previously added byte // Consider, what's better - read a char and get OutOfMemory (so read // char is lost), or allocate first as we do. vstr_cut_tail_bytes(&vstr, 1); break; } if (*p == '\n') { break; } } return mp_obj_new_str_from_vstr(STREAM_CONTENT_TYPE(stream_p), &vstr); }
STATIC mp_uint_t stringio_write(mp_obj_t o_in, const void *buf, mp_uint_t size, int *errcode) { mp_obj_stringio_t *o = o_in; mp_uint_t remaining = o->vstr->alloc - o->pos; if (size > remaining) { // Take all what's already allocated... o->vstr->len = o->vstr->alloc; // ... and add more vstr_add_len(o->vstr, size - remaining); } memcpy(o->vstr->buf + o->pos, buf, size); o->pos += size; if (o->pos > o->vstr->len) { o->vstr->len = o->pos; } return size; }
STATIC mp_obj_t stream_read_generic(size_t n_args, const mp_obj_t *args, byte flags) { // What to do if sz < -1? Python docs don't specify this case. // CPython does a readall, but here we silently let negatives through, // and they will cause a MemoryError. mp_int_t sz; if (n_args == 1 || ((sz = mp_obj_get_int(args[1])) == -1)) { return stream_readall(args[0]); } const mp_stream_p_t *stream_p = mp_get_stream(args[0]); #if MICROPY_PY_BUILTINS_STR_UNICODE if (stream_p->is_text) { // We need to read sz number of unicode characters. Because we don't have any // buffering, and because the stream API can only read bytes, we must read here // in units of bytes and must never over read. If we want sz chars, then reading // sz bytes will never over-read, so we follow this approach, in a loop to keep // reading until we have exactly enough chars. This will be 1 read for text // with ASCII-only chars, and about 2 reads for text with a couple of non-ASCII // chars. For text with lots of non-ASCII chars, it'll be pretty inefficient // in time and memory. vstr_t vstr; vstr_init(&vstr, sz); mp_uint_t more_bytes = sz; mp_uint_t last_buf_offset = 0; while (more_bytes > 0) { char *p = vstr_add_len(&vstr, more_bytes); int error; mp_uint_t out_sz = mp_stream_read_exactly(args[0], p, more_bytes, &error); if (error != 0) { vstr_cut_tail_bytes(&vstr, more_bytes); if (mp_is_nonblocking_error(error)) { // With non-blocking streams, we read as much as we can. // If we read nothing, return None, just like read(). // Otherwise, return data read so far. // TODO what if we have read only half a non-ASCII char? if (vstr.len == 0) { vstr_clear(&vstr); return mp_const_none; } break; } mp_raise_OSError(error); } if (out_sz < more_bytes) { // Finish reading. // TODO what if we have read only half a non-ASCII char? vstr_cut_tail_bytes(&vstr, more_bytes - out_sz); if (out_sz == 0) { break; } } // count chars from bytes just read for (mp_uint_t off = last_buf_offset;;) { byte b = vstr.buf[off]; int n; if (!UTF8_IS_NONASCII(b)) { // 1-byte ASCII char n = 1; } else if ((b & 0xe0) == 0xc0) { // 2-byte char n = 2; } else if ((b & 0xf0) == 0xe0) { // 3-byte char n = 3; } else if ((b & 0xf8) == 0xf0) { // 4-byte char n = 4; } else { // TODO n = 5; } if (off + n <= vstr.len) { // got a whole char in n bytes off += n; sz -= 1; last_buf_offset = off; if (off >= vstr.len) { more_bytes = sz; break; } } else { // didn't get a whole char, so work out how many extra bytes are needed for // this partial char, plus bytes for additional chars that we want more_bytes = (off + n - vstr.len) + (sz - 1); break; } } } return mp_obj_new_str_from_vstr(&mp_type_str, &vstr); } #endif vstr_t vstr; vstr_init_len(&vstr, sz); int error; mp_uint_t out_sz = mp_stream_rw(args[0], vstr.buf, sz, &error, flags); if (error != 0) { vstr_clear(&vstr); if (mp_is_nonblocking_error(error)) { // https://docs.python.org/3.4/library/io.html#io.RawIOBase.read // "If the object is in non-blocking mode and no bytes are available, // None is returned." // This is actually very weird, as naive truth check will treat // this as EOF. return mp_const_none; } mp_raise_OSError(error); } else { vstr.len = out_sz; return mp_obj_new_str_from_vstr(STREAM_CONTENT_TYPE(stream_p), &vstr); } }
// Unbuffered, inefficient implementation of readline() for raw I/O files. STATIC mp_obj_t stream_unbuffered_readline(uint n_args, const mp_obj_t *args) { struct _mp_obj_base_t *o = (struct _mp_obj_base_t *)args[0]; if (o->type->stream_p == NULL || o->type->stream_p->read == NULL) { // CPython: io.UnsupportedOperation, OSError subclass nlr_raise(mp_obj_new_exception_msg(&mp_type_OSError, "Operation not supported")); } mp_int_t max_size = -1; if (n_args > 1) { max_size = MP_OBJ_SMALL_INT_VALUE(args[1]); } vstr_t *vstr; if (max_size != -1) { vstr = vstr_new_size(max_size); } else { vstr = vstr_new(); } int error; while (max_size == -1 || max_size-- != 0) { char *p = vstr_add_len(vstr, 1); if (p == NULL) { nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_MemoryError, "out of memory")); } mp_uint_t out_sz = o->type->stream_p->read(o, p, 1, &error); if (out_sz == MP_STREAM_ERROR) { if (is_nonblocking_error(error)) { if (vstr->len == 1) { // We just incremented it, but otherwise we read nothing // and immediately got EAGAIN. This is case is not well // specified in // https://docs.python.org/3/library/io.html#io.IOBase.readline // unlike similar case for read(). But we follow the latter's // behavior - return None. vstr_free(vstr); return mp_const_none; } else { goto done; } } nlr_raise(mp_obj_new_exception_arg1(&mp_type_OSError, MP_OBJ_NEW_SMALL_INT(error))); } if (out_sz == 0) { done: // Back out previously added byte // Consider, what's better - read a char and get OutOfMemory (so read // char is lost), or allocate first as we do. vstr_cut_tail_bytes(vstr, 1); break; } if (*p == '\n') { break; } } // TODO need a string creation API that doesn't copy the given data mp_obj_t ret = mp_obj_new_str_of_type(STREAM_CONTENT_TYPE(o->type->stream_p), (byte*)vstr->buf, vstr->len); vstr_free(vstr); return ret; }