/* * Experimental variant: use wide characters * (we assume UTF-8 encoding) * * We assume that int is large enough to store any character. * To do this properly, we should use wint_t. */ static int file_reader_next_wchar(reader_t *reader) { wint_t c; assert(reader->is_stream); if (reader->current == EOF) { return EOF; } if (reader->current == '\n') { // this should works in UTF-8? reader->line ++; reader->column ++; } #if defined(LINUX) c = fgetwc_unlocked(reader->input.stream); #else c = fgetwc(reader->input.stream); #endif if (c == WEOF) { reader->current = EOF; } else { reader->current = c; reader->pos ++; reader->column += wcwidth(c); } return c; }
// TODO: return bool and use a param[out] for return value std::wstring StreamReader::read_cohort() { wchar_t wc; bool in_cohort = false; bool escape = false; size_t i = 0; //while ((wc = fgetwc(ins)) != WEOF) { while ((wc = fgetwc_unlocked(ins)) != WEOF) { if (i == arr_size) { arr = static_cast<wchar_t*>(realloc(arr, 2 * arr_size * sizeof(wchar_t))); arr_size *= 2; } if (!in_cohort) { // between cohorts: skip everything if (wc == L'^') { arr[i++] = wc; in_cohort = true; } } else { // in a cohort if (escape) { arr[i++] = wc; in_cohort = true; escape = false; } else if (wc == L'\\') { escape = true; } else if (wc == L'$') { arr[i++] = wc; in_cohort = true; return std::wstring(arr, i); } else { if (!iswspace(wc)) { // Might not be needed, or not everywhere arr[i++] = wc; } } } } // while /* WEOF -- what to do, what to do? */ return L""; }
wint_t getwchar_unlocked(void) { return fgetwc_unlocked(stdin); }