SLVAL sl_string_char_at_index(sl_vm_t* vm, SLVAL self, SLVAL index) { sl_string_t* str = sl_get_string(vm, self); if(sl_is_a(vm, index, vm->lib.Range_Inclusive) || sl_is_a(vm, index, vm->lib.Range_Exclusive)) { return string_range_index(vm, self, index); } long idx = sl_get_int(sl_expect(vm, index, vm->lib.Int)); if(idx < 0) { idx += str->char_len; } if(idx < 0 || idx >= (long)str->char_len) { return vm->lib.nil; } uint8_t* buff_ptr = str->buff; size_t len = str->buff_len; while(idx) { sl_utf8_each_char(vm, &buff_ptr, &len); idx--; } size_t slice_len = 1; while(slice_len < len && (buff_ptr[slice_len] & 0xc0) == 0x80) { slice_len++; } return sl_make_string(vm, buff_ptr, slice_len); }
static SLVAL string_range_index(sl_vm_t* vm, SLVAL self, SLVAL range) { sl_string_t* str = sl_get_string(vm, self); SLVAL lowerv = sl_range_lower(vm, range); SLVAL upperv = sl_range_upper(vm, range); if(!sl_is_a(vm, lowerv, vm->lib.Int) || !sl_is_a(vm, upperv, vm->lib.Int)) { sl_throw_message2(vm, vm->lib.TypeError, "Expected range of integers"); } long lower = sl_get_int(lowerv), upper = sl_get_int(upperv); if(lower < 0) { lower += str->char_len; } if(lower < 0 || (size_t)lower >= str->char_len) { return sl_make_cstring(vm, ""); } if(upper < 0) { upper += str->char_len; } if(upper < 0) { return sl_make_cstring(vm, ""); } if(sl_range_is_exclusive(vm, range)) { upper--; } if(upper < lower) { return sl_make_cstring(vm, ""); } uint8_t* begin_ptr = str->buff; uint8_t* end_ptr; size_t len = str->buff_len; long idx = 0; while(idx < lower && len) { idx++; sl_utf8_each_char(vm, &begin_ptr, &len); } end_ptr = begin_ptr; while(lower <= upper) { lower++; sl_utf8_each_char(vm, &end_ptr, &len); } return sl_make_string(vm, begin_ptr, (size_t)end_ptr - (size_t)begin_ptr); }
int sl_string_byte_offset_for_index(sl_vm_t* vm, SLVAL strv, int index) { sl_string_t* str = sl_get_string(vm, strv); uint8_t* buff = str->buff; size_t len = str->buff_len; while(len) { if(index == 0) { return buff - str->buff; } sl_utf8_each_char(vm, &buff, &len); index--; } return -1; }
SLVAL sl_string_url_encode(sl_vm_t* vm, SLVAL self) { sl_string_t* str = sl_get_string(vm, self); size_t out_cap = 32; size_t out_len = 0; uint8_t* out = sl_alloc_buffer(vm->arena, out_cap); size_t clen = str->buff_len; uint8_t* cbuff = str->buff; uint32_t c; uint8_t utf8buff[8]; uint32_t utf8len; while(clen) { if(out_len + 16 >= out_cap) { out_cap *= 2; out = sl_realloc(vm->arena, out, out_cap); } c = sl_utf8_each_char(vm, &cbuff, &clen); if(c >= 'A' && c <= 'Z') { out[out_len++] = c; continue; } if(c >= 'a' && c <= 'z') { out[out_len++] = c; continue; } if(c >= '0' && c <= '9') { out[out_len++] = c; continue; } if(c == '-' || c == '_' || c == '.' || c == '~') { out[out_len++] = c; continue; } if(c == ' ') { out[out_len++] = '+'; continue; } utf8len = sl_utf32_char_to_utf8(vm, c, utf8buff); for(unsigned int i = 0; i < utf8len; i++) { sprintf((char*)out + out_len, "%%%2X", utf8buff[i]); out_len += 3; } } return sl_make_string(vm, out, out_len); }
int sl_string_index_for_byte_offset(sl_vm_t* vm, SLVAL strv, int byte_offset) { sl_string_t* str = sl_get_string(vm, strv); uint8_t* buff = str->buff; size_t len = str->buff_len; int index = 0; while(len && byte_offset > 0) { size_t old_len = len; sl_utf8_each_char(vm, &buff, &len); index++; byte_offset -= old_len - len; } if(byte_offset > 0) { return -1; } return index; }
SLVAL sl_string_index(sl_vm_t* vm, SLVAL self, SLVAL substr) { sl_string_t* haystack = sl_get_string(vm, self); sl_string_t* needle = sl_get_string(vm, substr); /* @TODO use a more efficient algorithm */ uint8_t* haystack_buff = haystack->buff; size_t haystack_len = haystack->buff_len; size_t i = 0; while(haystack_len >= needle->buff_len) { if(memcmp(haystack_buff, needle->buff, needle->buff_len) == 0) { return sl_make_int(vm, i); } sl_utf8_each_char(vm, &haystack_buff, &haystack_len); i++; } return vm->lib.nil; }
SLVAL sl_string_lower(sl_vm_t* vm, SLVAL selfv) { sl_string_t* self = sl_get_string(vm, selfv); sl_string_t* retn = sl_get_string(vm, sl_allocate(vm, vm->lib.String)); memcpy(retn, self, sizeof(sl_string_t)); retn->buff = sl_alloc_buffer(vm->arena, retn->buff_len); size_t len = self->buff_len; uint8_t* buff = self->buff; size_t out_offset = 0; uint32_t lower_c; while(len) { uint32_t c = sl_utf8_each_char(vm, &buff, &len); lower_c = sl_unicode_tolower(c); out_offset += sl_utf32_char_to_utf8(vm, lower_c, retn->buff + out_offset); } return sl_make_ptr((sl_object_t*)retn); }
SLVAL sl_string_split(sl_vm_t* vm, SLVAL self, size_t argc, SLVAL* argv) { SLVAL substr = argv[0]; sl_string_t* haystack = sl_get_string(vm, self); sl_string_t* needle = sl_get_string(vm, substr); SLVAL ret = sl_make_array(vm, 0, NULL), piece; uint8_t* haystack_buff = haystack->buff; uint8_t* start_ptr = haystack_buff; size_t haystack_len = haystack->buff_len; uint8_t buff[12]; size_t buff_len; uint32_t c; long limit = 0; if(argc > 1) { limit = sl_get_int(sl_expect(vm, argv[1], vm->lib.Int)); if(limit < 0) { limit = 0; } } long length = 0; if(needle->buff_len == 0) { while(haystack_len) { length++; if(limit && length == limit) { SLVAL rest = sl_make_string(vm, haystack_buff, haystack_len); sl_array_push(vm, ret, 1, &rest); break; } c = sl_utf8_each_char(vm, &haystack_buff, &haystack_len); buff_len = sl_utf32_char_to_utf8(vm, c, buff); piece = sl_make_string(vm, buff, buff_len); sl_array_push(vm, ret, 1, &piece); } return ret; } else { if(limit == 1) { return sl_make_array(vm, 1, &self); } while(haystack_len >= needle->buff_len) { if(memcmp(haystack_buff, needle->buff, needle->buff_len) == 0) { piece = sl_make_string(vm, start_ptr, haystack_buff - start_ptr); sl_array_push(vm, ret, 1, &piece); haystack_buff += needle->buff_len; haystack_len -= needle->buff_len; length++; if(limit && length + 1 == limit) { SLVAL rest = sl_make_string(vm, haystack_buff, haystack_len); sl_array_push(vm, ret, 1, &rest); return ret; } start_ptr = haystack_buff; continue; } haystack_buff++; haystack_len--; } piece = sl_make_string(vm, start_ptr, haystack_buff - start_ptr + haystack_len); sl_array_push(vm, ret, 1, &piece); return ret; } return vm->lib.nil; }