/* Search for a prefix value in an environment file (pyvenv.cfg). If found, copy it into the provided buffer. */ int _Py_FindEnvConfigValue(FILE *env_file, const wchar_t *key, wchar_t *value, size_t value_size) { int result = 0; /* meaning not found */ char buffer[MAXPATHLEN*2+1]; /* allow extra for key, '=', etc. */ fseek(env_file, 0, SEEK_SET); while (!feof(env_file)) { char * p = fgets(buffer, MAXPATHLEN*2, env_file); if (p == NULL) { break; } size_t n = strlen(p); if (p[n - 1] != '\n') { /* line has overflowed - bail */ break; } if (p[0] == '#') { /* Comment - skip */ continue; } wchar_t *tmpbuffer = _Py_DecodeUTF8_surrogateescape(buffer, n); if (tmpbuffer) { wchar_t * state; wchar_t * tok = wcstok(tmpbuffer, L" \t\r\n", &state); if ((tok != NULL) && !wcscmp(tok, key)) { tok = wcstok(NULL, L" \t", &state); if ((tok != NULL) && !wcscmp(tok, L"=")) { tok = wcstok(NULL, L"\r\n", &state); if (tok != NULL) { wcsncpy(value, tok, MAXPATHLEN); result = 1; PyMem_RawFree(tmpbuffer); break; } } } PyMem_RawFree(tmpbuffer); } } return result; }
/* Decode a byte string from the locale encoding with the surrogateescape error handler (undecodable bytes are decoded as characters in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate character, escape the bytes using the surrogateescape error handler instead of decoding them. Use _Py_wchar2char() to encode the character string back to a byte string. Return a pointer to a newly allocated wide character string (use PyMem_RawFree() to free the memory) and write the number of written wide characters excluding the null character into *size if size is not NULL, or NULL on error (decoding or memory allocation error). If size is not NULL, *size is set to (size_t)-1 on memory error and (size_t)-2 on decoding error. Conversion errors should never happen, unless there is a bug in the C library. */ wchar_t* _Py_char2wchar(const char* arg, size_t *size) { #ifdef __APPLE__ wchar_t *wstr; wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg)); if (size != NULL) { if (wstr != NULL) *size = wcslen(wstr); else *size = (size_t)-1; } return wstr; #else wchar_t *res; size_t argsize; size_t count; #ifdef HAVE_MBRTOWC unsigned char *in; wchar_t *out; mbstate_t mbs; #endif #ifndef MS_WINDOWS if (force_ascii == -1) force_ascii = check_force_ascii(); if (force_ascii) { /* force ASCII encoding to workaround mbstowcs() issue */ res = decode_ascii_surrogateescape(arg, size); if (res == NULL) goto oom; return res; } #endif #ifdef HAVE_BROKEN_MBSTOWCS /* Some platforms have a broken implementation of * mbstowcs which does not count the characters that * would result from conversion. Use an upper bound. */ argsize = strlen(arg); #else argsize = mbstowcs(NULL, arg, 0); #endif if (argsize != (size_t)-1) { if (argsize == PY_SSIZE_T_MAX) goto oom; argsize += 1; if (argsize > PY_SSIZE_T_MAX/sizeof(wchar_t)) goto oom; res = (wchar_t *)PyMem_RawMalloc(argsize*sizeof(wchar_t)); if (!res) goto oom; count = mbstowcs(res, arg, argsize); if (count != (size_t)-1) { wchar_t *tmp; /* Only use the result if it contains no surrogate characters. */ for (tmp = res; *tmp != 0 && !Py_UNICODE_IS_SURROGATE(*tmp); tmp++) ; if (*tmp == 0) { if (size != NULL) *size = count; return res; } } PyMem_RawFree(res); } /* Conversion failed. Fall back to escaping with surrogateescape. */ #ifdef HAVE_MBRTOWC /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */ /* Overallocate; as multi-byte characters are in the argument, the actual output could use less memory. */ argsize = strlen(arg) + 1; if (argsize > PY_SSIZE_T_MAX/sizeof(wchar_t)) goto oom; res = (wchar_t*)PyMem_RawMalloc(argsize*sizeof(wchar_t)); if (!res) goto oom; in = (unsigned char*)arg; out = res; memset(&mbs, 0, sizeof mbs); while (argsize) { size_t converted = mbrtowc(out, (char*)in, argsize, &mbs); if (converted == 0) /* Reached end of string; null char stored. */ break; if (converted == (size_t)-2) { /* Incomplete character. This should never happen, since we provide everything that we have - unless there is a bug in the C library, or I misunderstood how mbrtowc works. */ PyMem_RawFree(res); if (size != NULL) *size = (size_t)-2; return NULL; } if (converted == (size_t)-1) { /* Conversion error. Escape as UTF-8b, and start over in the initial shift state. */ *out++ = 0xdc00 + *in++; argsize--; memset(&mbs, 0, sizeof mbs); continue; } if (Py_UNICODE_IS_SURROGATE(*out)) { /* Surrogate character. Escape the original byte sequence with surrogateescape. */ argsize -= converted; while (converted--) *out++ = 0xdc00 + *in++; continue; } /* successfully converted some bytes */ in += converted; argsize -= converted; out++; } if (size != NULL) *size = out - res; #else /* HAVE_MBRTOWC */ /* Cannot use C locale for escaping; manually escape as if charset is ASCII (i.e. escape all bytes > 128. This will still roundtrip correctly in the locale's charset, which must be an ASCII superset. */ res = decode_ascii_surrogateescape(arg, size); if (res == NULL) goto oom; #endif /* HAVE_MBRTOWC */ return res; oom: if (size != NULL) *size = (size_t)-1; return NULL; #endif /* __APPLE__ */ }
/* search_for_exec_prefix requires that argv0_path be no more than MAXPATHLEN bytes long. */ static int search_for_exec_prefix(const _PyCoreConfig *core_config, PyCalculatePath *calculate, wchar_t *exec_prefix) { size_t n; /* If PYTHONHOME is set, we believe it unconditionally */ if (core_config->home) { wchar_t *delim = wcschr(core_config->home, DELIM); if (delim) { wcsncpy(exec_prefix, delim+1, MAXPATHLEN); } else { wcsncpy(exec_prefix, core_config->home, MAXPATHLEN); } exec_prefix[MAXPATHLEN] = L'\0'; joinpath(exec_prefix, calculate->lib_python); joinpath(exec_prefix, L"lib-dynload"); return 1; } /* Check to see if argv[0] is in the build directory. "pybuilddir.txt" is written by setup.py and contains the relative path to the location of shared library modules. */ wcsncpy(exec_prefix, calculate->argv0_path, MAXPATHLEN); exec_prefix[MAXPATHLEN] = L'\0'; joinpath(exec_prefix, L"pybuilddir.txt"); if (isfile(exec_prefix)) { FILE *f = _Py_wfopen(exec_prefix, L"rb"); if (f == NULL) { errno = 0; } else { char buf[MAXPATHLEN+1]; wchar_t *rel_builddir_path; n = fread(buf, 1, MAXPATHLEN, f); buf[n] = '\0'; fclose(f); rel_builddir_path = _Py_DecodeUTF8_surrogateescape(buf, n); if (rel_builddir_path) { wcsncpy(exec_prefix, calculate->argv0_path, MAXPATHLEN); exec_prefix[MAXPATHLEN] = L'\0'; joinpath(exec_prefix, rel_builddir_path); PyMem_RawFree(rel_builddir_path ); return -1; } } } /* Search from argv0_path, until root is found */ copy_absolute(exec_prefix, calculate->argv0_path, MAXPATHLEN+1); do { n = wcslen(exec_prefix); joinpath(exec_prefix, calculate->lib_python); joinpath(exec_prefix, L"lib-dynload"); if (isdir(exec_prefix)) { return 1; } exec_prefix[n] = L'\0'; reduce(exec_prefix); } while (exec_prefix[0]); /* Look at configure's EXEC_PREFIX */ wcsncpy(exec_prefix, calculate->exec_prefix, MAXPATHLEN); exec_prefix[MAXPATHLEN] = L'\0'; joinpath(exec_prefix, calculate->lib_python); joinpath(exec_prefix, L"lib-dynload"); if (isdir(exec_prefix)) { return 1; } /* Fail */ return 0; }