Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Python/fileutils.c
12 views
1
#include "Python.h"
2
#include "pycore_fileutils.h" // fileutils definitions
3
#include "pycore_runtime.h" // _PyRuntime
4
#include "osdefs.h" // SEP
5
#include <locale.h>
6
#include <stdlib.h> // mbstowcs()
7
8
#ifdef MS_WINDOWS
9
# include <malloc.h>
10
# include <windows.h>
11
# include <winioctl.h> // FILE_DEVICE_* constants
12
# include "pycore_fileutils_windows.h" // FILE_STAT_BASIC_INFORMATION
13
# if defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP)
14
# define PATHCCH_ALLOW_LONG_PATHS 0x01
15
# else
16
# include <pathcch.h> // PathCchCombineEx
17
# endif
18
extern int winerror_to_errno(int);
19
#endif
20
21
#ifdef HAVE_LANGINFO_H
22
#include <langinfo.h>
23
#endif
24
25
#ifdef HAVE_SYS_IOCTL_H
26
#include <sys/ioctl.h>
27
#endif
28
29
#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
30
#include <iconv.h>
31
#endif
32
33
#ifdef HAVE_FCNTL_H
34
#include <fcntl.h>
35
#endif /* HAVE_FCNTL_H */
36
37
#ifdef O_CLOEXEC
38
/* Does open() support the O_CLOEXEC flag? Possible values:
39
40
-1: unknown
41
0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
42
1: open() supports O_CLOEXEC flag, close-on-exec is set
43
44
The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
45
and os.open(). */
46
int _Py_open_cloexec_works = -1;
47
#endif
48
49
// The value must be the same in unicodeobject.c.
50
#define MAX_UNICODE 0x10ffff
51
52
// mbstowcs() and mbrtowc() errors
53
static const size_t DECODE_ERROR = ((size_t)-1);
54
static const size_t INCOMPLETE_CHARACTER = (size_t)-2;
55
56
57
static int
58
get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
59
{
60
switch (errors)
61
{
62
case _Py_ERROR_STRICT:
63
*surrogateescape = 0;
64
return 0;
65
case _Py_ERROR_SURROGATEESCAPE:
66
*surrogateescape = 1;
67
return 0;
68
default:
69
return -1;
70
}
71
}
72
73
74
PyObject *
75
_Py_device_encoding(int fd)
76
{
77
int valid;
78
Py_BEGIN_ALLOW_THREADS
79
_Py_BEGIN_SUPPRESS_IPH
80
valid = isatty(fd);
81
_Py_END_SUPPRESS_IPH
82
Py_END_ALLOW_THREADS
83
if (!valid)
84
Py_RETURN_NONE;
85
86
#ifdef MS_WINDOWS
87
#ifdef HAVE_WINDOWS_CONSOLE_IO
88
UINT cp;
89
if (fd == 0)
90
cp = GetConsoleCP();
91
else if (fd == 1 || fd == 2)
92
cp = GetConsoleOutputCP();
93
else
94
cp = 0;
95
/* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
96
has no console */
97
if (cp == 0) {
98
Py_RETURN_NONE;
99
}
100
101
return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
102
#else
103
Py_RETURN_NONE;
104
#endif /* HAVE_WINDOWS_CONSOLE_IO */
105
#else
106
if (_PyRuntime.preconfig.utf8_mode) {
107
_Py_DECLARE_STR(utf_8, "utf-8");
108
return Py_NewRef(&_Py_STR(utf_8));
109
}
110
return _Py_GetLocaleEncodingObject();
111
#endif
112
}
113
114
115
static int
116
is_valid_wide_char(wchar_t ch)
117
{
118
#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
119
/* Oracle Solaris doesn't use Unicode code points as wchar_t encoding
120
for non-Unicode locales, which makes values higher than MAX_UNICODE
121
possibly valid. */
122
return 1;
123
#endif
124
if (Py_UNICODE_IS_SURROGATE(ch)) {
125
// Reject lone surrogate characters
126
return 0;
127
}
128
if (ch > MAX_UNICODE) {
129
// bpo-35883: Reject characters outside [U+0000; U+10ffff] range.
130
// The glibc mbstowcs() UTF-8 decoder does not respect the RFC 3629,
131
// it creates characters outside the [U+0000; U+10ffff] range:
132
// https://sourceware.org/bugzilla/show_bug.cgi?id=2373
133
return 0;
134
}
135
return 1;
136
}
137
138
139
static size_t
140
_Py_mbstowcs(wchar_t *dest, const char *src, size_t n)
141
{
142
size_t count = mbstowcs(dest, src, n);
143
if (dest != NULL && count != DECODE_ERROR) {
144
for (size_t i=0; i < count; i++) {
145
wchar_t ch = dest[i];
146
if (!is_valid_wide_char(ch)) {
147
return DECODE_ERROR;
148
}
149
}
150
}
151
return count;
152
}
153
154
155
#ifdef HAVE_MBRTOWC
156
static size_t
157
_Py_mbrtowc(wchar_t *pwc, const char *str, size_t len, mbstate_t *pmbs)
158
{
159
assert(pwc != NULL);
160
size_t count = mbrtowc(pwc, str, len, pmbs);
161
if (count != 0 && count != DECODE_ERROR && count != INCOMPLETE_CHARACTER) {
162
if (!is_valid_wide_char(*pwc)) {
163
return DECODE_ERROR;
164
}
165
}
166
return count;
167
}
168
#endif
169
170
171
#if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
172
173
#define USE_FORCE_ASCII
174
175
extern int _Py_normalize_encoding(const char *, char *, size_t);
176
177
/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
178
and POSIX locale. nl_langinfo(CODESET) announces an alias of the
179
ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
180
ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
181
locale.getpreferredencoding() codec. For example, if command line arguments
182
are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
183
UnicodeEncodeError instead of retrieving the original byte string.
184
185
The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
186
nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
187
one byte in range 0x80-0xff can be decoded from the locale encoding. The
188
workaround is also enabled on error, for example if getting the locale
189
failed.
190
191
On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
192
announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
193
ASCII encoding in this case.
194
195
Values of force_ascii:
196
197
1: the workaround is used: Py_EncodeLocale() uses
198
encode_ascii_surrogateescape() and Py_DecodeLocale() uses
199
decode_ascii()
200
0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
201
Py_DecodeLocale() uses mbstowcs()
202
-1: unknown, need to call check_force_ascii() to get the value
203
*/
204
#define force_ascii (_PyRuntime.fileutils.force_ascii)
205
206
static int
207
check_force_ascii(void)
208
{
209
char *loc = setlocale(LC_CTYPE, NULL);
210
if (loc == NULL) {
211
goto error;
212
}
213
if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
214
/* the LC_CTYPE locale is different than C and POSIX */
215
return 0;
216
}
217
218
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
219
const char *codeset = nl_langinfo(CODESET);
220
if (!codeset || codeset[0] == '\0') {
221
/* CODESET is not set or empty */
222
goto error;
223
}
224
225
char encoding[20]; /* longest name: "iso_646.irv_1991\0" */
226
if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
227
goto error;
228
}
229
230
#ifdef __hpux
231
if (strcmp(encoding, "roman8") == 0) {
232
unsigned char ch;
233
wchar_t wch;
234
size_t res;
235
236
ch = (unsigned char)0xA7;
237
res = _Py_mbstowcs(&wch, (char*)&ch, 1);
238
if (res != DECODE_ERROR && wch == L'\xA7') {
239
/* On HP-UX with C locale or the POSIX locale,
240
nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
241
Latin1 encoding in practice. Force ASCII in this case.
242
243
Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
244
return 1;
245
}
246
}
247
#else
248
const char* ascii_aliases[] = {
249
"ascii",
250
/* Aliases from Lib/encodings/aliases.py */
251
"646",
252
"ansi_x3.4_1968",
253
"ansi_x3.4_1986",
254
"ansi_x3_4_1968",
255
"cp367",
256
"csascii",
257
"ibm367",
258
"iso646_us",
259
"iso_646.irv_1991",
260
"iso_ir_6",
261
"us",
262
"us_ascii",
263
NULL
264
};
265
266
int is_ascii = 0;
267
for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
268
if (strcmp(encoding, *alias) == 0) {
269
is_ascii = 1;
270
break;
271
}
272
}
273
if (!is_ascii) {
274
/* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
275
return 0;
276
}
277
278
for (unsigned int i=0x80; i<=0xff; i++) {
279
char ch[1];
280
wchar_t wch[1];
281
size_t res;
282
283
unsigned uch = (unsigned char)i;
284
ch[0] = (char)uch;
285
res = _Py_mbstowcs(wch, ch, 1);
286
if (res != DECODE_ERROR) {
287
/* decoding a non-ASCII character from the locale encoding succeed:
288
the locale encoding is not ASCII, force ASCII */
289
return 1;
290
}
291
}
292
/* None of the bytes in the range 0x80-0xff can be decoded from the locale
293
encoding: the locale encoding is really ASCII */
294
#endif /* !defined(__hpux) */
295
return 0;
296
#else
297
/* nl_langinfo(CODESET) is not available: always force ASCII */
298
return 1;
299
#endif /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
300
301
error:
302
/* if an error occurred, force the ASCII encoding */
303
return 1;
304
}
305
306
307
int
308
_Py_GetForceASCII(void)
309
{
310
if (force_ascii == -1) {
311
force_ascii = check_force_ascii();
312
}
313
return force_ascii;
314
}
315
316
317
void
318
_Py_ResetForceASCII(void)
319
{
320
force_ascii = -1;
321
}
322
323
324
static int
325
encode_ascii(const wchar_t *text, char **str,
326
size_t *error_pos, const char **reason,
327
int raw_malloc, _Py_error_handler errors)
328
{
329
char *result = NULL, *out;
330
size_t len, i;
331
wchar_t ch;
332
333
int surrogateescape;
334
if (get_surrogateescape(errors, &surrogateescape) < 0) {
335
return -3;
336
}
337
338
len = wcslen(text);
339
340
/* +1 for NULL byte */
341
if (raw_malloc) {
342
result = PyMem_RawMalloc(len + 1);
343
}
344
else {
345
result = PyMem_Malloc(len + 1);
346
}
347
if (result == NULL) {
348
return -1;
349
}
350
351
out = result;
352
for (i=0; i<len; i++) {
353
ch = text[i];
354
355
if (ch <= 0x7f) {
356
/* ASCII character */
357
*out++ = (char)ch;
358
}
359
else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
360
/* UTF-8b surrogate */
361
*out++ = (char)(ch - 0xdc00);
362
}
363
else {
364
if (raw_malloc) {
365
PyMem_RawFree(result);
366
}
367
else {
368
PyMem_Free(result);
369
}
370
if (error_pos != NULL) {
371
*error_pos = i;
372
}
373
if (reason) {
374
*reason = "encoding error";
375
}
376
return -2;
377
}
378
}
379
*out = '\0';
380
*str = result;
381
return 0;
382
}
383
#else
384
int
385
_Py_GetForceASCII(void)
386
{
387
return 0;
388
}
389
390
void
391
_Py_ResetForceASCII(void)
392
{
393
/* nothing to do */
394
}
395
#endif /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
396
397
398
#if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
399
static int
400
decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
401
const char **reason, _Py_error_handler errors)
402
{
403
wchar_t *res;
404
unsigned char *in;
405
wchar_t *out;
406
size_t argsize = strlen(arg) + 1;
407
408
int surrogateescape;
409
if (get_surrogateescape(errors, &surrogateescape) < 0) {
410
return -3;
411
}
412
413
if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
414
return -1;
415
}
416
res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
417
if (!res) {
418
return -1;
419
}
420
421
out = res;
422
for (in = (unsigned char*)arg; *in; in++) {
423
unsigned char ch = *in;
424
if (ch < 128) {
425
*out++ = ch;
426
}
427
else {
428
if (!surrogateescape) {
429
PyMem_RawFree(res);
430
if (wlen) {
431
*wlen = in - (unsigned char*)arg;
432
}
433
if (reason) {
434
*reason = "decoding error";
435
}
436
return -2;
437
}
438
*out++ = 0xdc00 + ch;
439
}
440
}
441
*out = 0;
442
443
if (wlen != NULL) {
444
*wlen = out - res;
445
}
446
*wstr = res;
447
return 0;
448
}
449
#endif /* !HAVE_MBRTOWC */
450
451
static int
452
decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
453
const char **reason, _Py_error_handler errors)
454
{
455
wchar_t *res;
456
size_t argsize;
457
size_t count;
458
#ifdef HAVE_MBRTOWC
459
unsigned char *in;
460
wchar_t *out;
461
mbstate_t mbs;
462
#endif
463
464
int surrogateescape;
465
if (get_surrogateescape(errors, &surrogateescape) < 0) {
466
return -3;
467
}
468
469
#ifdef HAVE_BROKEN_MBSTOWCS
470
/* Some platforms have a broken implementation of
471
* mbstowcs which does not count the characters that
472
* would result from conversion. Use an upper bound.
473
*/
474
argsize = strlen(arg);
475
#else
476
argsize = _Py_mbstowcs(NULL, arg, 0);
477
#endif
478
if (argsize != DECODE_ERROR) {
479
if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
480
return -1;
481
}
482
res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
483
if (!res) {
484
return -1;
485
}
486
487
count = _Py_mbstowcs(res, arg, argsize + 1);
488
if (count != DECODE_ERROR) {
489
*wstr = res;
490
if (wlen != NULL) {
491
*wlen = count;
492
}
493
return 0;
494
}
495
PyMem_RawFree(res);
496
}
497
498
/* Conversion failed. Fall back to escaping with surrogateescape. */
499
#ifdef HAVE_MBRTOWC
500
/* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
501
502
/* Overallocate; as multi-byte characters are in the argument, the
503
actual output could use less memory. */
504
argsize = strlen(arg) + 1;
505
if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
506
return -1;
507
}
508
res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
509
if (!res) {
510
return -1;
511
}
512
513
in = (unsigned char*)arg;
514
out = res;
515
memset(&mbs, 0, sizeof mbs);
516
while (argsize) {
517
size_t converted = _Py_mbrtowc(out, (char*)in, argsize, &mbs);
518
if (converted == 0) {
519
/* Reached end of string; null char stored. */
520
break;
521
}
522
523
if (converted == INCOMPLETE_CHARACTER) {
524
/* Incomplete character. This should never happen,
525
since we provide everything that we have -
526
unless there is a bug in the C library, or I
527
misunderstood how mbrtowc works. */
528
goto decode_error;
529
}
530
531
if (converted == DECODE_ERROR) {
532
if (!surrogateescape) {
533
goto decode_error;
534
}
535
536
/* Decoding error. Escape as UTF-8b, and start over in the initial
537
shift state. */
538
*out++ = 0xdc00 + *in++;
539
argsize--;
540
memset(&mbs, 0, sizeof mbs);
541
continue;
542
}
543
544
// _Py_mbrtowc() reject lone surrogate characters
545
assert(!Py_UNICODE_IS_SURROGATE(*out));
546
547
/* successfully converted some bytes */
548
in += converted;
549
argsize -= converted;
550
out++;
551
}
552
if (wlen != NULL) {
553
*wlen = out - res;
554
}
555
*wstr = res;
556
return 0;
557
558
decode_error:
559
PyMem_RawFree(res);
560
if (wlen) {
561
*wlen = in - (unsigned char*)arg;
562
}
563
if (reason) {
564
*reason = "decoding error";
565
}
566
return -2;
567
#else /* HAVE_MBRTOWC */
568
/* Cannot use C locale for escaping; manually escape as if charset
569
is ASCII (i.e. escape all bytes > 128. This will still roundtrip
570
correctly in the locale's charset, which must be an ASCII superset. */
571
return decode_ascii(arg, wstr, wlen, reason, errors);
572
#endif /* HAVE_MBRTOWC */
573
}
574
575
576
/* Decode a byte string from the locale encoding.
577
578
Use the strict error handler if 'surrogateescape' is zero. Use the
579
surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
580
bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
581
can be decoded as a surrogate character, escape the bytes using the
582
surrogateescape error handler instead of decoding them.
583
584
On success, return 0 and write the newly allocated wide character string into
585
*wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
586
the number of wide characters excluding the null character into *wlen.
587
588
On memory allocation failure, return -1.
589
590
On decoding error, return -2. If wlen is not NULL, write the start of
591
invalid byte sequence in the input string into *wlen. If reason is not NULL,
592
write the decoding error message into *reason.
593
594
Return -3 if the error handler 'errors' is not supported.
595
596
Use the Py_EncodeLocaleEx() function to encode the character string back to
597
a byte string. */
598
int
599
_Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
600
const char **reason,
601
int current_locale, _Py_error_handler errors)
602
{
603
if (current_locale) {
604
#ifdef _Py_FORCE_UTF8_LOCALE
605
return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
606
errors);
607
#else
608
return decode_current_locale(arg, wstr, wlen, reason, errors);
609
#endif
610
}
611
612
#ifdef _Py_FORCE_UTF8_FS_ENCODING
613
return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
614
errors);
615
#else
616
int use_utf8 = (_PyRuntime.preconfig.utf8_mode >= 1);
617
#ifdef MS_WINDOWS
618
use_utf8 |= (_PyRuntime.preconfig.legacy_windows_fs_encoding == 0);
619
#endif
620
if (use_utf8) {
621
return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
622
errors);
623
}
624
625
#ifdef USE_FORCE_ASCII
626
if (force_ascii == -1) {
627
force_ascii = check_force_ascii();
628
}
629
630
if (force_ascii) {
631
/* force ASCII encoding to workaround mbstowcs() issue */
632
return decode_ascii(arg, wstr, wlen, reason, errors);
633
}
634
#endif
635
636
return decode_current_locale(arg, wstr, wlen, reason, errors);
637
#endif /* !_Py_FORCE_UTF8_FS_ENCODING */
638
}
639
640
641
/* Decode a byte string from the locale encoding with the
642
surrogateescape error handler: undecodable bytes are decoded as characters
643
in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
644
character, escape the bytes using the surrogateescape error handler instead
645
of decoding them.
646
647
Return a pointer to a newly allocated wide character string, use
648
PyMem_RawFree() to free the memory. If size is not NULL, write the number of
649
wide characters excluding the null character into *size
650
651
Return NULL on decoding error or memory allocation error. If *size* is not
652
NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
653
decoding error.
654
655
Decoding errors should never happen, unless there is a bug in the C
656
library.
657
658
Use the Py_EncodeLocale() function to encode the character string back to a
659
byte string. */
660
wchar_t*
661
Py_DecodeLocale(const char* arg, size_t *wlen)
662
{
663
wchar_t *wstr;
664
int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
665
NULL, 0,
666
_Py_ERROR_SURROGATEESCAPE);
667
if (res != 0) {
668
assert(res != -3);
669
if (wlen != NULL) {
670
*wlen = (size_t)res;
671
}
672
return NULL;
673
}
674
return wstr;
675
}
676
677
678
static int
679
encode_current_locale(const wchar_t *text, char **str,
680
size_t *error_pos, const char **reason,
681
int raw_malloc, _Py_error_handler errors)
682
{
683
const size_t len = wcslen(text);
684
char *result = NULL, *bytes = NULL;
685
size_t i, size, converted;
686
wchar_t c, buf[2];
687
688
int surrogateescape;
689
if (get_surrogateescape(errors, &surrogateescape) < 0) {
690
return -3;
691
}
692
693
/* The function works in two steps:
694
1. compute the length of the output buffer in bytes (size)
695
2. outputs the bytes */
696
size = 0;
697
buf[1] = 0;
698
while (1) {
699
for (i=0; i < len; i++) {
700
c = text[i];
701
if (c >= 0xdc80 && c <= 0xdcff) {
702
if (!surrogateescape) {
703
goto encode_error;
704
}
705
/* UTF-8b surrogate */
706
if (bytes != NULL) {
707
*bytes++ = c - 0xdc00;
708
size--;
709
}
710
else {
711
size++;
712
}
713
continue;
714
}
715
else {
716
buf[0] = c;
717
if (bytes != NULL) {
718
converted = wcstombs(bytes, buf, size);
719
}
720
else {
721
converted = wcstombs(NULL, buf, 0);
722
}
723
if (converted == DECODE_ERROR) {
724
goto encode_error;
725
}
726
if (bytes != NULL) {
727
bytes += converted;
728
size -= converted;
729
}
730
else {
731
size += converted;
732
}
733
}
734
}
735
if (result != NULL) {
736
*bytes = '\0';
737
break;
738
}
739
740
size += 1; /* nul byte at the end */
741
if (raw_malloc) {
742
result = PyMem_RawMalloc(size);
743
}
744
else {
745
result = PyMem_Malloc(size);
746
}
747
if (result == NULL) {
748
return -1;
749
}
750
bytes = result;
751
}
752
*str = result;
753
return 0;
754
755
encode_error:
756
if (raw_malloc) {
757
PyMem_RawFree(result);
758
}
759
else {
760
PyMem_Free(result);
761
}
762
if (error_pos != NULL) {
763
*error_pos = i;
764
}
765
if (reason) {
766
*reason = "encoding error";
767
}
768
return -2;
769
}
770
771
772
/* Encode a string to the locale encoding.
773
774
Parameters:
775
776
* raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
777
of PyMem_Malloc().
778
* current_locale: if non-zero, use the current LC_CTYPE, otherwise use
779
Python filesystem encoding.
780
* errors: error handler like "strict" or "surrogateescape".
781
782
Return value:
783
784
0: success, *str is set to a newly allocated decoded string.
785
-1: memory allocation failure
786
-2: encoding error, set *error_pos and *reason (if set).
787
-3: the error handler 'errors' is not supported.
788
*/
789
static int
790
encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
791
const char **reason,
792
int raw_malloc, int current_locale, _Py_error_handler errors)
793
{
794
if (current_locale) {
795
#ifdef _Py_FORCE_UTF8_LOCALE
796
return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
797
raw_malloc, errors);
798
#else
799
return encode_current_locale(text, str, error_pos, reason,
800
raw_malloc, errors);
801
#endif
802
}
803
804
#ifdef _Py_FORCE_UTF8_FS_ENCODING
805
return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
806
raw_malloc, errors);
807
#else
808
int use_utf8 = (_PyRuntime.preconfig.utf8_mode >= 1);
809
#ifdef MS_WINDOWS
810
use_utf8 |= (_PyRuntime.preconfig.legacy_windows_fs_encoding == 0);
811
#endif
812
if (use_utf8) {
813
return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
814
raw_malloc, errors);
815
}
816
817
#ifdef USE_FORCE_ASCII
818
if (force_ascii == -1) {
819
force_ascii = check_force_ascii();
820
}
821
822
if (force_ascii) {
823
return encode_ascii(text, str, error_pos, reason,
824
raw_malloc, errors);
825
}
826
#endif
827
828
return encode_current_locale(text, str, error_pos, reason,
829
raw_malloc, errors);
830
#endif /* _Py_FORCE_UTF8_FS_ENCODING */
831
}
832
833
static char*
834
encode_locale(const wchar_t *text, size_t *error_pos,
835
int raw_malloc, int current_locale)
836
{
837
char *str;
838
int res = encode_locale_ex(text, &str, error_pos, NULL,
839
raw_malloc, current_locale,
840
_Py_ERROR_SURROGATEESCAPE);
841
if (res != -2 && error_pos) {
842
*error_pos = (size_t)-1;
843
}
844
if (res != 0) {
845
return NULL;
846
}
847
return str;
848
}
849
850
/* Encode a wide character string to the locale encoding with the
851
surrogateescape error handler: surrogate characters in the range
852
U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
853
854
Return a pointer to a newly allocated byte string, use PyMem_Free() to free
855
the memory. Return NULL on encoding or memory allocation error.
856
857
If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
858
to the index of the invalid character on encoding error.
859
860
Use the Py_DecodeLocale() function to decode the bytes string back to a wide
861
character string. */
862
char*
863
Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
864
{
865
return encode_locale(text, error_pos, 0, 0);
866
}
867
868
869
/* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
870
instead of PyMem_Free(). */
871
char*
872
_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
873
{
874
return encode_locale(text, error_pos, 1, 0);
875
}
876
877
878
int
879
_Py_EncodeLocaleEx(const wchar_t *text, char **str,
880
size_t *error_pos, const char **reason,
881
int current_locale, _Py_error_handler errors)
882
{
883
return encode_locale_ex(text, str, error_pos, reason, 1,
884
current_locale, errors);
885
}
886
887
888
// Get the current locale encoding name:
889
//
890
// - Return "utf-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
891
// - Return "utf-8" if the UTF-8 Mode is enabled
892
// - On Windows, return the ANSI code page (ex: "cp1250")
893
// - Return "utf-8" if nl_langinfo(CODESET) returns an empty string.
894
// - Otherwise, return nl_langinfo(CODESET).
895
//
896
// Return NULL on memory allocation failure.
897
//
898
// See also config_get_locale_encoding()
899
wchar_t*
900
_Py_GetLocaleEncoding(void)
901
{
902
#ifdef _Py_FORCE_UTF8_LOCALE
903
// On Android langinfo.h and CODESET are missing,
904
// and UTF-8 is always used in mbstowcs() and wcstombs().
905
return _PyMem_RawWcsdup(L"utf-8");
906
#else
907
908
#ifdef MS_WINDOWS
909
wchar_t encoding[23];
910
unsigned int ansi_codepage = GetACP();
911
swprintf(encoding, Py_ARRAY_LENGTH(encoding), L"cp%u", ansi_codepage);
912
encoding[Py_ARRAY_LENGTH(encoding) - 1] = 0;
913
return _PyMem_RawWcsdup(encoding);
914
#else
915
const char *encoding = nl_langinfo(CODESET);
916
if (!encoding || encoding[0] == '\0') {
917
// Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
918
// macOS if the LC_CTYPE locale is not supported.
919
return _PyMem_RawWcsdup(L"utf-8");
920
}
921
922
wchar_t *wstr;
923
int res = decode_current_locale(encoding, &wstr, NULL,
924
NULL, _Py_ERROR_SURROGATEESCAPE);
925
if (res < 0) {
926
return NULL;
927
}
928
return wstr;
929
#endif // !MS_WINDOWS
930
931
#endif // !_Py_FORCE_UTF8_LOCALE
932
}
933
934
935
PyObject *
936
_Py_GetLocaleEncodingObject(void)
937
{
938
wchar_t *encoding = _Py_GetLocaleEncoding();
939
if (encoding == NULL) {
940
PyErr_NoMemory();
941
return NULL;
942
}
943
944
PyObject *str = PyUnicode_FromWideChar(encoding, -1);
945
PyMem_RawFree(encoding);
946
return str;
947
}
948
949
#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
950
951
/* Check whether current locale uses Unicode as internal wchar_t form. */
952
int
953
_Py_LocaleUsesNonUnicodeWchar(void)
954
{
955
/* Oracle Solaris uses non-Unicode internal wchar_t form for
956
non-Unicode locales and hence needs conversion to UTF first. */
957
char* codeset = nl_langinfo(CODESET);
958
if (!codeset) {
959
return 0;
960
}
961
/* 646 refers to ISO/IEC 646 standard that corresponds to ASCII encoding */
962
return (strcmp(codeset, "UTF-8") != 0 && strcmp(codeset, "646") != 0);
963
}
964
965
static wchar_t *
966
_Py_ConvertWCharForm(const wchar_t *source, Py_ssize_t size,
967
const char *tocode, const char *fromcode)
968
{
969
static_assert(sizeof(wchar_t) == 4, "wchar_t must be 32-bit");
970
971
/* Ensure we won't overflow the size. */
972
if (size > (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t))) {
973
PyErr_NoMemory();
974
return NULL;
975
}
976
977
/* the string doesn't have to be NULL terminated */
978
wchar_t* target = PyMem_Malloc(size * sizeof(wchar_t));
979
if (target == NULL) {
980
PyErr_NoMemory();
981
return NULL;
982
}
983
984
iconv_t cd = iconv_open(tocode, fromcode);
985
if (cd == (iconv_t)-1) {
986
PyErr_Format(PyExc_ValueError, "iconv_open() failed");
987
PyMem_Free(target);
988
return NULL;
989
}
990
991
char *inbuf = (char *) source;
992
char *outbuf = (char *) target;
993
size_t inbytesleft = sizeof(wchar_t) * size;
994
size_t outbytesleft = inbytesleft;
995
996
size_t ret = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
997
if (ret == DECODE_ERROR) {
998
PyErr_Format(PyExc_ValueError, "iconv() failed");
999
PyMem_Free(target);
1000
iconv_close(cd);
1001
return NULL;
1002
}
1003
1004
iconv_close(cd);
1005
return target;
1006
}
1007
1008
/* Convert a wide character string to the UCS-4 encoded string. This
1009
is necessary on systems where internal form of wchar_t are not Unicode
1010
code points (e.g. Oracle Solaris).
1011
1012
Return a pointer to a newly allocated string, use PyMem_Free() to free
1013
the memory. Return NULL and raise exception on conversion or memory
1014
allocation error. */
1015
wchar_t *
1016
_Py_DecodeNonUnicodeWchar(const wchar_t *native, Py_ssize_t size)
1017
{
1018
return _Py_ConvertWCharForm(native, size, "UCS-4-INTERNAL", "wchar_t");
1019
}
1020
1021
/* Convert a UCS-4 encoded string to native wide character string. This
1022
is necessary on systems where internal form of wchar_t are not Unicode
1023
code points (e.g. Oracle Solaris).
1024
1025
The conversion is done in place. This can be done because both wchar_t
1026
and UCS-4 use 4-byte encoding, and one wchar_t symbol always correspond
1027
to a single UCS-4 symbol and vice versa. (This is true for Oracle Solaris,
1028
which is currently the only system using these functions; it doesn't have
1029
to be for other systems).
1030
1031
Return 0 on success. Return -1 and raise exception on conversion
1032
or memory allocation error. */
1033
int
1034
_Py_EncodeNonUnicodeWchar_InPlace(wchar_t *unicode, Py_ssize_t size)
1035
{
1036
wchar_t* result = _Py_ConvertWCharForm(unicode, size, "wchar_t", "UCS-4-INTERNAL");
1037
if (!result) {
1038
return -1;
1039
}
1040
memcpy(unicode, result, size * sizeof(wchar_t));
1041
PyMem_Free(result);
1042
return 0;
1043
}
1044
#endif /* HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION */
1045
1046
#ifdef MS_WINDOWS
1047
static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
1048
1049
static void
1050
FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
1051
{
1052
/* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
1053
/* Cannot simply cast and dereference in_ptr,
1054
since it might not be aligned properly */
1055
__int64 in;
1056
memcpy(&in, in_ptr, sizeof(in));
1057
*nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
1058
*time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
1059
}
1060
1061
static void
1062
LARGE_INTEGER_to_time_t_nsec(LARGE_INTEGER *in_ptr, time_t *time_out, int* nsec_out)
1063
{
1064
*nsec_out = (int)(in_ptr->QuadPart % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
1065
*time_out = Py_SAFE_DOWNCAST((in_ptr->QuadPart / 10000000) - secs_between_epochs, __int64, time_t);
1066
}
1067
1068
void
1069
_Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
1070
{
1071
/* XXX endianness */
1072
__int64 out;
1073
out = time_in + secs_between_epochs;
1074
out = out * 10000000 + nsec_in / 100;
1075
memcpy(out_ptr, &out, sizeof(out));
1076
}
1077
1078
/* Below, we *know* that ugo+r is 0444 */
1079
#if _S_IREAD != 0400
1080
#error Unsupported C library
1081
#endif
1082
static int
1083
attributes_to_mode(DWORD attr)
1084
{
1085
int m = 0;
1086
if (attr & FILE_ATTRIBUTE_DIRECTORY)
1087
m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
1088
else
1089
m |= _S_IFREG;
1090
if (attr & FILE_ATTRIBUTE_READONLY)
1091
m |= 0444;
1092
else
1093
m |= 0666;
1094
return m;
1095
}
1096
1097
1098
typedef union {
1099
FILE_ID_128 id;
1100
struct {
1101
uint64_t st_ino;
1102
uint64_t st_ino_high;
1103
};
1104
} id_128_to_ino;
1105
1106
1107
void
1108
_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
1109
FILE_BASIC_INFO *basic_info, FILE_ID_INFO *id_info,
1110
struct _Py_stat_struct *result)
1111
{
1112
memset(result, 0, sizeof(*result));
1113
result->st_mode = attributes_to_mode(info->dwFileAttributes);
1114
result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
1115
result->st_dev = id_info ? id_info->VolumeSerialNumber : info->dwVolumeSerialNumber;
1116
result->st_rdev = 0;
1117
/* st_ctime is deprecated, but we preserve the legacy value in our caller, not here */
1118
if (basic_info) {
1119
LARGE_INTEGER_to_time_t_nsec(&basic_info->CreationTime, &result->st_birthtime, &result->st_birthtime_nsec);
1120
LARGE_INTEGER_to_time_t_nsec(&basic_info->ChangeTime, &result->st_ctime, &result->st_ctime_nsec);
1121
LARGE_INTEGER_to_time_t_nsec(&basic_info->LastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1122
LARGE_INTEGER_to_time_t_nsec(&basic_info->LastAccessTime, &result->st_atime, &result->st_atime_nsec);
1123
} else {
1124
FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_birthtime, &result->st_birthtime_nsec);
1125
FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1126
FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
1127
}
1128
result->st_nlink = info->nNumberOfLinks;
1129
1130
if (id_info) {
1131
id_128_to_ino file_id;
1132
file_id.id = id_info->FileId;
1133
result->st_ino = file_id.st_ino;
1134
result->st_ino_high = file_id.st_ino_high;
1135
}
1136
if (!result->st_ino && !result->st_ino_high) {
1137
/* should only occur for DirEntry_from_find_data, in which case the
1138
index is likely to be zero anyway. */
1139
result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
1140
}
1141
1142
/* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
1143
open other name surrogate reparse points without traversing them. To
1144
detect/handle these, check st_file_attributes and st_reparse_tag. */
1145
result->st_reparse_tag = reparse_tag;
1146
if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
1147
reparse_tag == IO_REPARSE_TAG_SYMLINK) {
1148
/* set the bits that make this a symlink */
1149
result->st_mode = (result->st_mode & ~S_IFMT) | S_IFLNK;
1150
}
1151
result->st_file_attributes = info->dwFileAttributes;
1152
}
1153
1154
void
1155
_Py_stat_basic_info_to_stat(FILE_STAT_BASIC_INFORMATION *info,
1156
struct _Py_stat_struct *result)
1157
{
1158
memset(result, 0, sizeof(*result));
1159
result->st_mode = attributes_to_mode(info->FileAttributes);
1160
result->st_size = info->EndOfFile.QuadPart;
1161
LARGE_INTEGER_to_time_t_nsec(&info->CreationTime, &result->st_birthtime, &result->st_birthtime_nsec);
1162
LARGE_INTEGER_to_time_t_nsec(&info->ChangeTime, &result->st_ctime, &result->st_ctime_nsec);
1163
LARGE_INTEGER_to_time_t_nsec(&info->LastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1164
LARGE_INTEGER_to_time_t_nsec(&info->LastAccessTime, &result->st_atime, &result->st_atime_nsec);
1165
result->st_nlink = info->NumberOfLinks;
1166
result->st_dev = info->VolumeSerialNumber.QuadPart;
1167
/* File systems with less than 128-bits zero pad into this field */
1168
id_128_to_ino file_id;
1169
file_id.id = info->FileId128;
1170
result->st_ino = file_id.st_ino;
1171
result->st_ino_high = file_id.st_ino_high;
1172
/* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
1173
open other name surrogate reparse points without traversing them. To
1174
detect/handle these, check st_file_attributes and st_reparse_tag. */
1175
result->st_reparse_tag = info->ReparseTag;
1176
if (info->FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
1177
info->ReparseTag == IO_REPARSE_TAG_SYMLINK) {
1178
/* set the bits that make this a symlink */
1179
result->st_mode = (result->st_mode & ~S_IFMT) | S_IFLNK;
1180
}
1181
result->st_file_attributes = info->FileAttributes;
1182
switch (info->DeviceType) {
1183
case FILE_DEVICE_DISK:
1184
case FILE_DEVICE_VIRTUAL_DISK:
1185
case FILE_DEVICE_DFS:
1186
case FILE_DEVICE_CD_ROM:
1187
case FILE_DEVICE_CONTROLLER:
1188
case FILE_DEVICE_DATALINK:
1189
break;
1190
case FILE_DEVICE_DISK_FILE_SYSTEM:
1191
case FILE_DEVICE_CD_ROM_FILE_SYSTEM:
1192
case FILE_DEVICE_NETWORK_FILE_SYSTEM:
1193
result->st_mode = (result->st_mode & ~S_IFMT) | 0x6000; /* _S_IFBLK */
1194
break;
1195
case FILE_DEVICE_CONSOLE:
1196
case FILE_DEVICE_NULL:
1197
case FILE_DEVICE_KEYBOARD:
1198
case FILE_DEVICE_MODEM:
1199
case FILE_DEVICE_MOUSE:
1200
case FILE_DEVICE_PARALLEL_PORT:
1201
case FILE_DEVICE_PRINTER:
1202
case FILE_DEVICE_SCREEN:
1203
case FILE_DEVICE_SERIAL_PORT:
1204
case FILE_DEVICE_SOUND:
1205
result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFCHR;
1206
break;
1207
case FILE_DEVICE_NAMED_PIPE:
1208
result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFIFO;
1209
break;
1210
default:
1211
if (info->FileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
1212
result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFDIR;
1213
}
1214
break;
1215
}
1216
}
1217
1218
#endif
1219
1220
/* Return information about a file.
1221
1222
On POSIX, use fstat().
1223
1224
On Windows, use GetFileType() and GetFileInformationByHandle() which support
1225
files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
1226
than 2 GiB because the file size type is a signed 32-bit integer: see issue
1227
#23152.
1228
1229
On Windows, set the last Windows error and return nonzero on error. On
1230
POSIX, set errno and return nonzero on error. Fill status and return 0 on
1231
success. */
1232
int
1233
_Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
1234
{
1235
#ifdef MS_WINDOWS
1236
BY_HANDLE_FILE_INFORMATION info;
1237
FILE_BASIC_INFO basicInfo;
1238
FILE_ID_INFO idInfo;
1239
HANDLE h;
1240
int type;
1241
1242
h = _Py_get_osfhandle_noraise(fd);
1243
1244
if (h == INVALID_HANDLE_VALUE) {
1245
/* errno is already set by _get_osfhandle, but we also set
1246
the Win32 error for callers who expect that */
1247
SetLastError(ERROR_INVALID_HANDLE);
1248
return -1;
1249
}
1250
memset(status, 0, sizeof(*status));
1251
1252
type = GetFileType(h);
1253
if (type == FILE_TYPE_UNKNOWN) {
1254
DWORD error = GetLastError();
1255
if (error != 0) {
1256
errno = winerror_to_errno(error);
1257
return -1;
1258
}
1259
/* else: valid but unknown file */
1260
}
1261
1262
if (type != FILE_TYPE_DISK) {
1263
if (type == FILE_TYPE_CHAR)
1264
status->st_mode = _S_IFCHR;
1265
else if (type == FILE_TYPE_PIPE)
1266
status->st_mode = _S_IFIFO;
1267
return 0;
1268
}
1269
1270
if (!GetFileInformationByHandle(h, &info) ||
1271
!GetFileInformationByHandleEx(h, FileBasicInfo, &basicInfo, sizeof(basicInfo)) ||
1272
!GetFileInformationByHandleEx(h, FileIdInfo, &idInfo, sizeof(idInfo))) {
1273
/* The Win32 error is already set, but we also set errno for
1274
callers who expect it */
1275
errno = winerror_to_errno(GetLastError());
1276
return -1;
1277
}
1278
1279
_Py_attribute_data_to_stat(&info, 0, &basicInfo, &idInfo, status);
1280
return 0;
1281
#else
1282
return fstat(fd, status);
1283
#endif
1284
}
1285
1286
/* Return information about a file.
1287
1288
On POSIX, use fstat().
1289
1290
On Windows, use GetFileType() and GetFileInformationByHandle() which support
1291
files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
1292
than 2 GiB because the file size type is a signed 32-bit integer: see issue
1293
#23152.
1294
1295
Raise an exception and return -1 on error. On Windows, set the last Windows
1296
error on error. On POSIX, set errno on error. Fill status and return 0 on
1297
success.
1298
1299
Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
1300
to call fstat(). The caller must hold the GIL. */
1301
int
1302
_Py_fstat(int fd, struct _Py_stat_struct *status)
1303
{
1304
int res;
1305
1306
assert(PyGILState_Check());
1307
1308
Py_BEGIN_ALLOW_THREADS
1309
res = _Py_fstat_noraise(fd, status);
1310
Py_END_ALLOW_THREADS
1311
1312
if (res != 0) {
1313
#ifdef MS_WINDOWS
1314
PyErr_SetFromWindowsErr(0);
1315
#else
1316
PyErr_SetFromErrno(PyExc_OSError);
1317
#endif
1318
return -1;
1319
}
1320
return 0;
1321
}
1322
1323
/* Like _Py_stat() but with a raw filename. */
1324
int
1325
_Py_wstat(const wchar_t* path, struct stat *buf)
1326
{
1327
int err;
1328
#ifdef MS_WINDOWS
1329
struct _stat wstatbuf;
1330
err = _wstat(path, &wstatbuf);
1331
if (!err) {
1332
buf->st_mode = wstatbuf.st_mode;
1333
}
1334
#else
1335
char *fname;
1336
fname = _Py_EncodeLocaleRaw(path, NULL);
1337
if (fname == NULL) {
1338
errno = EINVAL;
1339
return -1;
1340
}
1341
err = stat(fname, buf);
1342
PyMem_RawFree(fname);
1343
#endif
1344
return err;
1345
}
1346
1347
1348
/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1349
call stat() otherwise. Only fill st_mode attribute on Windows.
1350
1351
Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1352
raised. */
1353
1354
int
1355
_Py_stat(PyObject *path, struct stat *statbuf)
1356
{
1357
#ifdef MS_WINDOWS
1358
int err;
1359
1360
wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1361
if (wpath == NULL)
1362
return -2;
1363
1364
err = _Py_wstat(wpath, statbuf);
1365
PyMem_Free(wpath);
1366
return err;
1367
#else
1368
int ret;
1369
PyObject *bytes;
1370
char *cpath;
1371
1372
bytes = PyUnicode_EncodeFSDefault(path);
1373
if (bytes == NULL)
1374
return -2;
1375
1376
/* check for embedded null bytes */
1377
if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
1378
Py_DECREF(bytes);
1379
return -2;
1380
}
1381
1382
ret = stat(cpath, statbuf);
1383
Py_DECREF(bytes);
1384
return ret;
1385
#endif
1386
}
1387
1388
#ifdef MS_WINDOWS
1389
// For some Windows API partitions, SetHandleInformation() is declared
1390
// but none of the handle flags are defined.
1391
#ifndef HANDLE_FLAG_INHERIT
1392
#define HANDLE_FLAG_INHERIT 0x00000001
1393
#endif
1394
#endif
1395
1396
/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1397
static int
1398
get_inheritable(int fd, int raise)
1399
{
1400
#ifdef MS_WINDOWS
1401
HANDLE handle;
1402
DWORD flags;
1403
1404
handle = _Py_get_osfhandle_noraise(fd);
1405
if (handle == INVALID_HANDLE_VALUE) {
1406
if (raise)
1407
PyErr_SetFromErrno(PyExc_OSError);
1408
return -1;
1409
}
1410
1411
if (!GetHandleInformation(handle, &flags)) {
1412
if (raise)
1413
PyErr_SetFromWindowsErr(0);
1414
return -1;
1415
}
1416
1417
return (flags & HANDLE_FLAG_INHERIT);
1418
#else
1419
int flags;
1420
1421
flags = fcntl(fd, F_GETFD, 0);
1422
if (flags == -1) {
1423
if (raise)
1424
PyErr_SetFromErrno(PyExc_OSError);
1425
return -1;
1426
}
1427
return !(flags & FD_CLOEXEC);
1428
#endif
1429
}
1430
1431
/* Get the inheritable flag of the specified file descriptor.
1432
Return 1 if the file descriptor can be inherited, 0 if it cannot,
1433
raise an exception and return -1 on error. */
1434
int
1435
_Py_get_inheritable(int fd)
1436
{
1437
return get_inheritable(fd, 1);
1438
}
1439
1440
1441
/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1442
static int
1443
set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1444
{
1445
#ifdef MS_WINDOWS
1446
HANDLE handle;
1447
DWORD flags;
1448
#else
1449
#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1450
static int ioctl_works = -1;
1451
int request;
1452
int err;
1453
#endif
1454
int flags, new_flags;
1455
int res;
1456
#endif
1457
1458
/* atomic_flag_works can only be used to make the file descriptor
1459
non-inheritable */
1460
assert(!(atomic_flag_works != NULL && inheritable));
1461
1462
if (atomic_flag_works != NULL && !inheritable) {
1463
if (*atomic_flag_works == -1) {
1464
int isInheritable = get_inheritable(fd, raise);
1465
if (isInheritable == -1)
1466
return -1;
1467
*atomic_flag_works = !isInheritable;
1468
}
1469
1470
if (*atomic_flag_works)
1471
return 0;
1472
}
1473
1474
#ifdef MS_WINDOWS
1475
handle = _Py_get_osfhandle_noraise(fd);
1476
if (handle == INVALID_HANDLE_VALUE) {
1477
if (raise)
1478
PyErr_SetFromErrno(PyExc_OSError);
1479
return -1;
1480
}
1481
1482
if (inheritable)
1483
flags = HANDLE_FLAG_INHERIT;
1484
else
1485
flags = 0;
1486
1487
if (!SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
1488
if (raise)
1489
PyErr_SetFromWindowsErr(0);
1490
return -1;
1491
}
1492
return 0;
1493
1494
#else
1495
1496
#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1497
if (ioctl_works != 0 && raise != 0) {
1498
/* fast-path: ioctl() only requires one syscall */
1499
/* caveat: raise=0 is an indicator that we must be async-signal-safe
1500
* thus avoid using ioctl() so we skip the fast-path. */
1501
if (inheritable)
1502
request = FIONCLEX;
1503
else
1504
request = FIOCLEX;
1505
err = ioctl(fd, request, NULL);
1506
if (!err) {
1507
ioctl_works = 1;
1508
return 0;
1509
}
1510
1511
#ifdef O_PATH
1512
if (errno == EBADF) {
1513
// bpo-44849: On Linux and FreeBSD, ioctl(FIOCLEX) fails with EBADF
1514
// on O_PATH file descriptors. Fall through to the fcntl()
1515
// implementation.
1516
}
1517
else
1518
#endif
1519
if (errno != ENOTTY && errno != EACCES) {
1520
if (raise)
1521
PyErr_SetFromErrno(PyExc_OSError);
1522
return -1;
1523
}
1524
else {
1525
/* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1526
device". The ioctl is declared but not supported by the kernel.
1527
Remember that ioctl() doesn't work. It is the case on
1528
Illumos-based OS for example.
1529
1530
Issue #27057: When SELinux policy disallows ioctl it will fail
1531
with EACCES. While FIOCLEX is safe operation it may be
1532
unavailable because ioctl was denied altogether.
1533
This can be the case on Android. */
1534
ioctl_works = 0;
1535
}
1536
/* fallback to fcntl() if ioctl() does not work */
1537
}
1538
#endif
1539
1540
/* slow-path: fcntl() requires two syscalls */
1541
flags = fcntl(fd, F_GETFD);
1542
if (flags < 0) {
1543
if (raise)
1544
PyErr_SetFromErrno(PyExc_OSError);
1545
return -1;
1546
}
1547
1548
if (inheritable) {
1549
new_flags = flags & ~FD_CLOEXEC;
1550
}
1551
else {
1552
new_flags = flags | FD_CLOEXEC;
1553
}
1554
1555
if (new_flags == flags) {
1556
/* FD_CLOEXEC flag already set/cleared: nothing to do */
1557
return 0;
1558
}
1559
1560
res = fcntl(fd, F_SETFD, new_flags);
1561
if (res < 0) {
1562
if (raise)
1563
PyErr_SetFromErrno(PyExc_OSError);
1564
return -1;
1565
}
1566
return 0;
1567
#endif
1568
}
1569
1570
/* Make the file descriptor non-inheritable.
1571
Return 0 on success, set errno and return -1 on error. */
1572
static int
1573
make_non_inheritable(int fd)
1574
{
1575
return set_inheritable(fd, 0, 0, NULL);
1576
}
1577
1578
/* Set the inheritable flag of the specified file descriptor.
1579
On success: return 0, on error: raise an exception and return -1.
1580
1581
If atomic_flag_works is not NULL:
1582
1583
* if *atomic_flag_works==-1, check if the inheritable is set on the file
1584
descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1585
set the inheritable flag
1586
* if *atomic_flag_works==1: do nothing
1587
* if *atomic_flag_works==0: set inheritable flag to False
1588
1589
Set atomic_flag_works to NULL if no atomic flag was used to create the
1590
file descriptor.
1591
1592
atomic_flag_works can only be used to make a file descriptor
1593
non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1594
int
1595
_Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1596
{
1597
return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1598
}
1599
1600
/* Same as _Py_set_inheritable() but on error, set errno and
1601
don't raise an exception.
1602
This function is async-signal-safe. */
1603
int
1604
_Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1605
{
1606
return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1607
}
1608
1609
static int
1610
_Py_open_impl(const char *pathname, int flags, int gil_held)
1611
{
1612
int fd;
1613
int async_err = 0;
1614
#ifndef MS_WINDOWS
1615
int *atomic_flag_works;
1616
#endif
1617
1618
#ifdef MS_WINDOWS
1619
flags |= O_NOINHERIT;
1620
#elif defined(O_CLOEXEC)
1621
atomic_flag_works = &_Py_open_cloexec_works;
1622
flags |= O_CLOEXEC;
1623
#else
1624
atomic_flag_works = NULL;
1625
#endif
1626
1627
if (gil_held) {
1628
PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1629
if (pathname_obj == NULL) {
1630
return -1;
1631
}
1632
if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
1633
Py_DECREF(pathname_obj);
1634
return -1;
1635
}
1636
1637
do {
1638
Py_BEGIN_ALLOW_THREADS
1639
fd = open(pathname, flags);
1640
Py_END_ALLOW_THREADS
1641
} while (fd < 0
1642
&& errno == EINTR && !(async_err = PyErr_CheckSignals()));
1643
if (async_err) {
1644
Py_DECREF(pathname_obj);
1645
return -1;
1646
}
1647
if (fd < 0) {
1648
PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
1649
Py_DECREF(pathname_obj);
1650
return -1;
1651
}
1652
Py_DECREF(pathname_obj);
1653
}
1654
else {
1655
fd = open(pathname, flags);
1656
if (fd < 0)
1657
return -1;
1658
}
1659
1660
#ifndef MS_WINDOWS
1661
if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
1662
close(fd);
1663
return -1;
1664
}
1665
#endif
1666
1667
return fd;
1668
}
1669
1670
/* Open a file with the specified flags (wrapper to open() function).
1671
Return a file descriptor on success. Raise an exception and return -1 on
1672
error.
1673
1674
The file descriptor is created non-inheritable.
1675
1676
When interrupted by a signal (open() fails with EINTR), retry the syscall,
1677
except if the Python signal handler raises an exception.
1678
1679
Release the GIL to call open(). The caller must hold the GIL. */
1680
int
1681
_Py_open(const char *pathname, int flags)
1682
{
1683
/* _Py_open() must be called with the GIL held. */
1684
assert(PyGILState_Check());
1685
return _Py_open_impl(pathname, flags, 1);
1686
}
1687
1688
/* Open a file with the specified flags (wrapper to open() function).
1689
Return a file descriptor on success. Set errno and return -1 on error.
1690
1691
The file descriptor is created non-inheritable.
1692
1693
If interrupted by a signal, fail with EINTR. */
1694
int
1695
_Py_open_noraise(const char *pathname, int flags)
1696
{
1697
return _Py_open_impl(pathname, flags, 0);
1698
}
1699
1700
/* Open a file. Use _wfopen() on Windows, encode the path to the locale
1701
encoding and use fopen() otherwise.
1702
1703
The file descriptor is created non-inheritable.
1704
1705
If interrupted by a signal, fail with EINTR. */
1706
FILE *
1707
_Py_wfopen(const wchar_t *path, const wchar_t *mode)
1708
{
1709
FILE *f;
1710
if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
1711
return NULL;
1712
}
1713
#ifndef MS_WINDOWS
1714
char *cpath;
1715
char cmode[10];
1716
size_t r;
1717
r = wcstombs(cmode, mode, 10);
1718
if (r == DECODE_ERROR || r >= 10) {
1719
errno = EINVAL;
1720
return NULL;
1721
}
1722
cpath = _Py_EncodeLocaleRaw(path, NULL);
1723
if (cpath == NULL) {
1724
return NULL;
1725
}
1726
f = fopen(cpath, cmode);
1727
PyMem_RawFree(cpath);
1728
#else
1729
f = _wfopen(path, mode);
1730
#endif
1731
if (f == NULL)
1732
return NULL;
1733
if (make_non_inheritable(fileno(f)) < 0) {
1734
fclose(f);
1735
return NULL;
1736
}
1737
return f;
1738
}
1739
1740
1741
/* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
1742
encoding and call fopen() otherwise.
1743
1744
Return the new file object on success. Raise an exception and return NULL
1745
on error.
1746
1747
The file descriptor is created non-inheritable.
1748
1749
When interrupted by a signal (open() fails with EINTR), retry the syscall,
1750
except if the Python signal handler raises an exception.
1751
1752
Release the GIL to call _wfopen() or fopen(). The caller must hold
1753
the GIL. */
1754
FILE*
1755
_Py_fopen_obj(PyObject *path, const char *mode)
1756
{
1757
FILE *f;
1758
int async_err = 0;
1759
#ifdef MS_WINDOWS
1760
wchar_t wmode[10];
1761
int usize;
1762
1763
assert(PyGILState_Check());
1764
1765
if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1766
return NULL;
1767
}
1768
if (!PyUnicode_Check(path)) {
1769
PyErr_Format(PyExc_TypeError,
1770
"str file path expected under Windows, got %R",
1771
Py_TYPE(path));
1772
return NULL;
1773
}
1774
1775
wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1776
if (wpath == NULL)
1777
return NULL;
1778
1779
usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1780
wmode, Py_ARRAY_LENGTH(wmode));
1781
if (usize == 0) {
1782
PyErr_SetFromWindowsErr(0);
1783
PyMem_Free(wpath);
1784
return NULL;
1785
}
1786
1787
do {
1788
Py_BEGIN_ALLOW_THREADS
1789
f = _wfopen(wpath, wmode);
1790
Py_END_ALLOW_THREADS
1791
} while (f == NULL
1792
&& errno == EINTR && !(async_err = PyErr_CheckSignals()));
1793
PyMem_Free(wpath);
1794
#else
1795
PyObject *bytes;
1796
const char *path_bytes;
1797
1798
assert(PyGILState_Check());
1799
1800
if (!PyUnicode_FSConverter(path, &bytes))
1801
return NULL;
1802
path_bytes = PyBytes_AS_STRING(bytes);
1803
1804
if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1805
Py_DECREF(bytes);
1806
return NULL;
1807
}
1808
1809
do {
1810
Py_BEGIN_ALLOW_THREADS
1811
f = fopen(path_bytes, mode);
1812
Py_END_ALLOW_THREADS
1813
} while (f == NULL
1814
&& errno == EINTR && !(async_err = PyErr_CheckSignals()));
1815
1816
Py_DECREF(bytes);
1817
#endif
1818
if (async_err)
1819
return NULL;
1820
1821
if (f == NULL) {
1822
PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
1823
return NULL;
1824
}
1825
1826
if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
1827
fclose(f);
1828
return NULL;
1829
}
1830
return f;
1831
}
1832
1833
/* Read count bytes from fd into buf.
1834
1835
On success, return the number of read bytes, it can be lower than count.
1836
If the current file offset is at or past the end of file, no bytes are read,
1837
and read() returns zero.
1838
1839
On error, raise an exception, set errno and return -1.
1840
1841
When interrupted by a signal (read() fails with EINTR), retry the syscall.
1842
If the Python signal handler raises an exception, the function returns -1
1843
(the syscall is not retried).
1844
1845
Release the GIL to call read(). The caller must hold the GIL. */
1846
Py_ssize_t
1847
_Py_read(int fd, void *buf, size_t count)
1848
{
1849
Py_ssize_t n;
1850
int err;
1851
int async_err = 0;
1852
1853
assert(PyGILState_Check());
1854
1855
/* _Py_read() must not be called with an exception set, otherwise the
1856
* caller may think that read() was interrupted by a signal and the signal
1857
* handler raised an exception. */
1858
assert(!PyErr_Occurred());
1859
1860
if (count > _PY_READ_MAX) {
1861
count = _PY_READ_MAX;
1862
}
1863
1864
_Py_BEGIN_SUPPRESS_IPH
1865
do {
1866
Py_BEGIN_ALLOW_THREADS
1867
errno = 0;
1868
#ifdef MS_WINDOWS
1869
_doserrno = 0;
1870
n = read(fd, buf, (int)count);
1871
// read() on a non-blocking empty pipe fails with EINVAL, which is
1872
// mapped from the Windows error code ERROR_NO_DATA.
1873
if (n < 0 && errno == EINVAL) {
1874
if (_doserrno == ERROR_NO_DATA) {
1875
errno = EAGAIN;
1876
}
1877
}
1878
#else
1879
n = read(fd, buf, count);
1880
#endif
1881
/* save/restore errno because PyErr_CheckSignals()
1882
* and PyErr_SetFromErrno() can modify it */
1883
err = errno;
1884
Py_END_ALLOW_THREADS
1885
} while (n < 0 && err == EINTR &&
1886
!(async_err = PyErr_CheckSignals()));
1887
_Py_END_SUPPRESS_IPH
1888
1889
if (async_err) {
1890
/* read() was interrupted by a signal (failed with EINTR)
1891
* and the Python signal handler raised an exception */
1892
errno = err;
1893
assert(errno == EINTR && PyErr_Occurred());
1894
return -1;
1895
}
1896
if (n < 0) {
1897
PyErr_SetFromErrno(PyExc_OSError);
1898
errno = err;
1899
return -1;
1900
}
1901
1902
return n;
1903
}
1904
1905
static Py_ssize_t
1906
_Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
1907
{
1908
Py_ssize_t n;
1909
int err;
1910
int async_err = 0;
1911
1912
_Py_BEGIN_SUPPRESS_IPH
1913
#ifdef MS_WINDOWS
1914
if (count > 32767) {
1915
/* Issue #11395: the Windows console returns an error (12: not
1916
enough space error) on writing into stdout if stdout mode is
1917
binary and the length is greater than 66,000 bytes (or less,
1918
depending on heap usage). */
1919
if (gil_held) {
1920
Py_BEGIN_ALLOW_THREADS
1921
if (isatty(fd)) {
1922
count = 32767;
1923
}
1924
Py_END_ALLOW_THREADS
1925
} else {
1926
if (isatty(fd)) {
1927
count = 32767;
1928
}
1929
}
1930
}
1931
1932
#endif
1933
if (count > _PY_WRITE_MAX) {
1934
count = _PY_WRITE_MAX;
1935
}
1936
1937
if (gil_held) {
1938
do {
1939
Py_BEGIN_ALLOW_THREADS
1940
errno = 0;
1941
#ifdef MS_WINDOWS
1942
// write() on a non-blocking pipe fails with ENOSPC on Windows if
1943
// the pipe lacks available space for the entire buffer.
1944
int c = (int)count;
1945
do {
1946
_doserrno = 0;
1947
n = write(fd, buf, c);
1948
if (n >= 0 || errno != ENOSPC || _doserrno != 0) {
1949
break;
1950
}
1951
errno = EAGAIN;
1952
c /= 2;
1953
} while (c > 0);
1954
#else
1955
n = write(fd, buf, count);
1956
#endif
1957
/* save/restore errno because PyErr_CheckSignals()
1958
* and PyErr_SetFromErrno() can modify it */
1959
err = errno;
1960
Py_END_ALLOW_THREADS
1961
} while (n < 0 && err == EINTR &&
1962
!(async_err = PyErr_CheckSignals()));
1963
}
1964
else {
1965
do {
1966
errno = 0;
1967
#ifdef MS_WINDOWS
1968
// write() on a non-blocking pipe fails with ENOSPC on Windows if
1969
// the pipe lacks available space for the entire buffer.
1970
int c = (int)count;
1971
do {
1972
_doserrno = 0;
1973
n = write(fd, buf, c);
1974
if (n >= 0 || errno != ENOSPC || _doserrno != 0) {
1975
break;
1976
}
1977
errno = EAGAIN;
1978
c /= 2;
1979
} while (c > 0);
1980
#else
1981
n = write(fd, buf, count);
1982
#endif
1983
err = errno;
1984
} while (n < 0 && err == EINTR);
1985
}
1986
_Py_END_SUPPRESS_IPH
1987
1988
if (async_err) {
1989
/* write() was interrupted by a signal (failed with EINTR)
1990
and the Python signal handler raised an exception (if gil_held is
1991
nonzero). */
1992
errno = err;
1993
assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
1994
return -1;
1995
}
1996
if (n < 0) {
1997
if (gil_held)
1998
PyErr_SetFromErrno(PyExc_OSError);
1999
errno = err;
2000
return -1;
2001
}
2002
2003
return n;
2004
}
2005
2006
/* Write count bytes of buf into fd.
2007
2008
On success, return the number of written bytes, it can be lower than count
2009
including 0. On error, raise an exception, set errno and return -1.
2010
2011
When interrupted by a signal (write() fails with EINTR), retry the syscall.
2012
If the Python signal handler raises an exception, the function returns -1
2013
(the syscall is not retried).
2014
2015
Release the GIL to call write(). The caller must hold the GIL. */
2016
Py_ssize_t
2017
_Py_write(int fd, const void *buf, size_t count)
2018
{
2019
assert(PyGILState_Check());
2020
2021
/* _Py_write() must not be called with an exception set, otherwise the
2022
* caller may think that write() was interrupted by a signal and the signal
2023
* handler raised an exception. */
2024
assert(!PyErr_Occurred());
2025
2026
return _Py_write_impl(fd, buf, count, 1);
2027
}
2028
2029
/* Write count bytes of buf into fd.
2030
*
2031
* On success, return the number of written bytes, it can be lower than count
2032
* including 0. On error, set errno and return -1.
2033
*
2034
* When interrupted by a signal (write() fails with EINTR), retry the syscall
2035
* without calling the Python signal handler. */
2036
Py_ssize_t
2037
_Py_write_noraise(int fd, const void *buf, size_t count)
2038
{
2039
return _Py_write_impl(fd, buf, count, 0);
2040
}
2041
2042
#ifdef HAVE_READLINK
2043
2044
/* Read value of symbolic link. Encode the path to the locale encoding, decode
2045
the result from the locale encoding.
2046
2047
Return -1 on encoding error, on readlink() error, if the internal buffer is
2048
too short, on decoding error, or if 'buf' is too short. */
2049
int
2050
_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
2051
{
2052
char *cpath;
2053
char cbuf[MAXPATHLEN];
2054
size_t cbuf_len = Py_ARRAY_LENGTH(cbuf);
2055
wchar_t *wbuf;
2056
Py_ssize_t res;
2057
size_t r1;
2058
2059
cpath = _Py_EncodeLocaleRaw(path, NULL);
2060
if (cpath == NULL) {
2061
errno = EINVAL;
2062
return -1;
2063
}
2064
res = readlink(cpath, cbuf, cbuf_len);
2065
PyMem_RawFree(cpath);
2066
if (res == -1) {
2067
return -1;
2068
}
2069
if ((size_t)res == cbuf_len) {
2070
errno = EINVAL;
2071
return -1;
2072
}
2073
cbuf[res] = '\0'; /* buf will be null terminated */
2074
wbuf = Py_DecodeLocale(cbuf, &r1);
2075
if (wbuf == NULL) {
2076
errno = EINVAL;
2077
return -1;
2078
}
2079
/* wbuf must have space to store the trailing NUL character */
2080
if (buflen <= r1) {
2081
PyMem_RawFree(wbuf);
2082
errno = EINVAL;
2083
return -1;
2084
}
2085
wcsncpy(buf, wbuf, buflen);
2086
PyMem_RawFree(wbuf);
2087
return (int)r1;
2088
}
2089
#endif
2090
2091
#ifdef HAVE_REALPATH
2092
2093
/* Return the canonicalized absolute pathname. Encode path to the locale
2094
encoding, decode the result from the locale encoding.
2095
2096
Return NULL on encoding error, realpath() error, decoding error
2097
or if 'resolved_path' is too short. */
2098
wchar_t*
2099
_Py_wrealpath(const wchar_t *path,
2100
wchar_t *resolved_path, size_t resolved_path_len)
2101
{
2102
char *cpath;
2103
char cresolved_path[MAXPATHLEN];
2104
wchar_t *wresolved_path;
2105
char *res;
2106
size_t r;
2107
cpath = _Py_EncodeLocaleRaw(path, NULL);
2108
if (cpath == NULL) {
2109
errno = EINVAL;
2110
return NULL;
2111
}
2112
res = realpath(cpath, cresolved_path);
2113
PyMem_RawFree(cpath);
2114
if (res == NULL)
2115
return NULL;
2116
2117
wresolved_path = Py_DecodeLocale(cresolved_path, &r);
2118
if (wresolved_path == NULL) {
2119
errno = EINVAL;
2120
return NULL;
2121
}
2122
/* wresolved_path must have space to store the trailing NUL character */
2123
if (resolved_path_len <= r) {
2124
PyMem_RawFree(wresolved_path);
2125
errno = EINVAL;
2126
return NULL;
2127
}
2128
wcsncpy(resolved_path, wresolved_path, resolved_path_len);
2129
PyMem_RawFree(wresolved_path);
2130
return resolved_path;
2131
}
2132
#endif
2133
2134
2135
int
2136
_Py_isabs(const wchar_t *path)
2137
{
2138
#ifdef MS_WINDOWS
2139
const wchar_t *tail;
2140
HRESULT hr = PathCchSkipRoot(path, &tail);
2141
if (FAILED(hr) || path == tail) {
2142
return 0;
2143
}
2144
if (tail == &path[1] && (path[0] == SEP || path[0] == ALTSEP)) {
2145
// Exclude paths with leading SEP
2146
return 0;
2147
}
2148
if (tail == &path[2] && path[1] == L':') {
2149
// Exclude drive-relative paths (e.g. C:filename.ext)
2150
return 0;
2151
}
2152
return 1;
2153
#else
2154
return (path[0] == SEP);
2155
#endif
2156
}
2157
2158
2159
/* Get an absolute path.
2160
On error (ex: fail to get the current directory), return -1.
2161
On memory allocation failure, set *abspath_p to NULL and return 0.
2162
On success, return a newly allocated to *abspath_p to and return 0.
2163
The string must be freed by PyMem_RawFree(). */
2164
int
2165
_Py_abspath(const wchar_t *path, wchar_t **abspath_p)
2166
{
2167
if (path[0] == '\0' || !wcscmp(path, L".")) {
2168
wchar_t cwd[MAXPATHLEN + 1];
2169
cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2170
if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
2171
/* unable to get the current directory */
2172
return -1;
2173
}
2174
*abspath_p = _PyMem_RawWcsdup(cwd);
2175
return 0;
2176
}
2177
2178
if (_Py_isabs(path)) {
2179
*abspath_p = _PyMem_RawWcsdup(path);
2180
return 0;
2181
}
2182
2183
#ifdef MS_WINDOWS
2184
return _PyOS_getfullpathname(path, abspath_p);
2185
#else
2186
wchar_t cwd[MAXPATHLEN + 1];
2187
cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2188
if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
2189
/* unable to get the current directory */
2190
return -1;
2191
}
2192
2193
size_t cwd_len = wcslen(cwd);
2194
size_t path_len = wcslen(path);
2195
size_t len = cwd_len + 1 + path_len + 1;
2196
if (len <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
2197
*abspath_p = PyMem_RawMalloc(len * sizeof(wchar_t));
2198
}
2199
else {
2200
*abspath_p = NULL;
2201
}
2202
if (*abspath_p == NULL) {
2203
return 0;
2204
}
2205
2206
wchar_t *abspath = *abspath_p;
2207
memcpy(abspath, cwd, cwd_len * sizeof(wchar_t));
2208
abspath += cwd_len;
2209
2210
*abspath = (wchar_t)SEP;
2211
abspath++;
2212
2213
memcpy(abspath, path, path_len * sizeof(wchar_t));
2214
abspath += path_len;
2215
2216
*abspath = 0;
2217
return 0;
2218
#endif
2219
}
2220
2221
// The Windows Games API family implements the PathCch* APIs in the Xbox OS,
2222
// but does not expose them yet. Load them dynamically until
2223
// 1) they are officially exposed
2224
// 2) we stop supporting older versions of the GDK which do not expose them
2225
#if defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP)
2226
HRESULT
2227
PathCchSkipRoot(const wchar_t *path, const wchar_t **rootEnd)
2228
{
2229
static int initialized = 0;
2230
typedef HRESULT(__stdcall *PPathCchSkipRoot) (PCWSTR pszPath,
2231
PCWSTR *ppszRootEnd);
2232
static PPathCchSkipRoot _PathCchSkipRoot;
2233
2234
if (initialized == 0) {
2235
HMODULE pathapi = LoadLibraryExW(L"api-ms-win-core-path-l1-1-0.dll", NULL,
2236
LOAD_LIBRARY_SEARCH_SYSTEM32);
2237
if (pathapi) {
2238
_PathCchSkipRoot = (PPathCchSkipRoot)GetProcAddress(
2239
pathapi, "PathCchSkipRoot");
2240
}
2241
else {
2242
_PathCchSkipRoot = NULL;
2243
}
2244
initialized = 1;
2245
}
2246
2247
if (!_PathCchSkipRoot) {
2248
return E_NOINTERFACE;
2249
}
2250
2251
return _PathCchSkipRoot(path, rootEnd);
2252
}
2253
2254
static HRESULT
2255
PathCchCombineEx(wchar_t *buffer, size_t bufsize, const wchar_t *dirname,
2256
const wchar_t *relfile, unsigned long flags)
2257
{
2258
static int initialized = 0;
2259
typedef HRESULT(__stdcall *PPathCchCombineEx) (PWSTR pszPathOut,
2260
size_t cchPathOut,
2261
PCWSTR pszPathIn,
2262
PCWSTR pszMore,
2263
unsigned long dwFlags);
2264
static PPathCchCombineEx _PathCchCombineEx;
2265
2266
if (initialized == 0) {
2267
HMODULE pathapi = LoadLibraryExW(L"api-ms-win-core-path-l1-1-0.dll", NULL,
2268
LOAD_LIBRARY_SEARCH_SYSTEM32);
2269
if (pathapi) {
2270
_PathCchCombineEx = (PPathCchCombineEx)GetProcAddress(
2271
pathapi, "PathCchCombineEx");
2272
}
2273
else {
2274
_PathCchCombineEx = NULL;
2275
}
2276
initialized = 1;
2277
}
2278
2279
if (!_PathCchCombineEx) {
2280
return E_NOINTERFACE;
2281
}
2282
2283
return _PathCchCombineEx(buffer, bufsize, dirname, relfile, flags);
2284
}
2285
2286
#endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */
2287
2288
// The caller must ensure "buffer" is big enough.
2289
static int
2290
join_relfile(wchar_t *buffer, size_t bufsize,
2291
const wchar_t *dirname, const wchar_t *relfile)
2292
{
2293
#ifdef MS_WINDOWS
2294
if (FAILED(PathCchCombineEx(buffer, bufsize, dirname, relfile,
2295
PATHCCH_ALLOW_LONG_PATHS))) {
2296
return -1;
2297
}
2298
#else
2299
assert(!_Py_isabs(relfile));
2300
size_t dirlen = wcslen(dirname);
2301
size_t rellen = wcslen(relfile);
2302
size_t maxlen = bufsize - 1;
2303
if (maxlen > MAXPATHLEN || dirlen >= maxlen || rellen >= maxlen - dirlen) {
2304
return -1;
2305
}
2306
if (dirlen == 0) {
2307
// We do not add a leading separator.
2308
wcscpy(buffer, relfile);
2309
}
2310
else {
2311
if (dirname != buffer) {
2312
wcscpy(buffer, dirname);
2313
}
2314
size_t relstart = dirlen;
2315
if (dirlen > 1 && dirname[dirlen - 1] != SEP) {
2316
buffer[dirlen] = SEP;
2317
relstart += 1;
2318
}
2319
wcscpy(&buffer[relstart], relfile);
2320
}
2321
#endif
2322
return 0;
2323
}
2324
2325
/* Join the two paths together, like os.path.join(). Return NULL
2326
if memory could not be allocated. The caller is responsible
2327
for calling PyMem_RawFree() on the result. */
2328
wchar_t *
2329
_Py_join_relfile(const wchar_t *dirname, const wchar_t *relfile)
2330
{
2331
assert(dirname != NULL && relfile != NULL);
2332
#ifndef MS_WINDOWS
2333
assert(!_Py_isabs(relfile));
2334
#endif
2335
size_t maxlen = wcslen(dirname) + 1 + wcslen(relfile);
2336
size_t bufsize = maxlen + 1;
2337
wchar_t *filename = PyMem_RawMalloc(bufsize * sizeof(wchar_t));
2338
if (filename == NULL) {
2339
return NULL;
2340
}
2341
assert(wcslen(dirname) < MAXPATHLEN);
2342
assert(wcslen(relfile) < MAXPATHLEN - wcslen(dirname));
2343
if (join_relfile(filename, bufsize, dirname, relfile) < 0) {
2344
PyMem_RawFree(filename);
2345
return NULL;
2346
}
2347
return filename;
2348
}
2349
2350
/* Join the two paths together, like os.path.join().
2351
dirname: the target buffer with the dirname already in place,
2352
including trailing NUL
2353
relfile: this must be a relative path
2354
bufsize: total allocated size of the buffer
2355
Return -1 if anything is wrong with the path lengths. */
2356
int
2357
_Py_add_relfile(wchar_t *dirname, const wchar_t *relfile, size_t bufsize)
2358
{
2359
assert(dirname != NULL && relfile != NULL);
2360
assert(bufsize > 0);
2361
return join_relfile(dirname, bufsize, dirname, relfile);
2362
}
2363
2364
2365
size_t
2366
_Py_find_basename(const wchar_t *filename)
2367
{
2368
for (size_t i = wcslen(filename); i > 0; --i) {
2369
if (filename[i] == SEP) {
2370
return i + 1;
2371
}
2372
}
2373
return 0;
2374
}
2375
2376
/* In-place path normalisation. Returns the start of the normalized
2377
path, which will be within the original buffer. Guaranteed to not
2378
make the path longer, and will not fail. 'size' is the length of
2379
the path, if known. If -1, the first null character will be assumed
2380
to be the end of the path. */
2381
wchar_t *
2382
_Py_normpath(wchar_t *path, Py_ssize_t size)
2383
{
2384
assert(path != NULL);
2385
if (!path[0] || size == 0) {
2386
return path;
2387
}
2388
wchar_t *pEnd = size >= 0 ? &path[size] : NULL;
2389
wchar_t *p1 = path; // sequentially scanned address in the path
2390
wchar_t *p2 = path; // destination of a scanned character to be ljusted
2391
wchar_t *minP2 = path; // the beginning of the destination range
2392
wchar_t lastC = L'\0'; // the last ljusted character, p2[-1] in most cases
2393
2394
#define IS_END(x) (pEnd ? (x) == pEnd : !*(x))
2395
#ifdef ALTSEP
2396
#define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP)
2397
#else
2398
#define IS_SEP(x) (*(x) == SEP)
2399
#endif
2400
#define SEP_OR_END(x) (IS_SEP(x) || IS_END(x))
2401
2402
// Skip leading '.\'
2403
if (p1[0] == L'.' && IS_SEP(&p1[1])) {
2404
path = &path[2];
2405
while (IS_SEP(path) && !IS_END(path)) {
2406
path++;
2407
}
2408
p1 = p2 = minP2 = path;
2409
lastC = SEP;
2410
}
2411
#ifdef MS_WINDOWS
2412
// Skip past drive segment and update minP2
2413
else if (p1[0] && p1[1] == L':') {
2414
*p2++ = *p1++;
2415
*p2++ = *p1++;
2416
minP2 = p2;
2417
lastC = L':';
2418
}
2419
// Skip past all \\-prefixed paths, including \\?\, \\.\,
2420
// and network paths, including the first segment.
2421
else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1])) {
2422
int sepCount = 2;
2423
*p2++ = SEP;
2424
*p2++ = SEP;
2425
p1 += 2;
2426
for (; !IS_END(p1) && sepCount; ++p1) {
2427
if (IS_SEP(p1)) {
2428
--sepCount;
2429
*p2++ = lastC = SEP;
2430
} else {
2431
*p2++ = lastC = *p1;
2432
}
2433
}
2434
if (sepCount) {
2435
minP2 = p2; // Invalid path
2436
} else {
2437
minP2 = p2 - 1; // Absolute path has SEP at minP2
2438
}
2439
}
2440
#else
2441
// Skip past two leading SEPs
2442
else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1]) && !IS_SEP(&p1[2])) {
2443
*p2++ = *p1++;
2444
*p2++ = *p1++;
2445
minP2 = p2 - 1; // Absolute path has SEP at minP2
2446
lastC = SEP;
2447
}
2448
#endif /* MS_WINDOWS */
2449
2450
/* if pEnd is specified, check that. Else, check for null terminator */
2451
for (; !IS_END(p1); ++p1) {
2452
wchar_t c = *p1;
2453
#ifdef ALTSEP
2454
if (c == ALTSEP) {
2455
c = SEP;
2456
}
2457
#endif
2458
if (lastC == SEP) {
2459
if (c == L'.') {
2460
int sep_at_1 = SEP_OR_END(&p1[1]);
2461
int sep_at_2 = !sep_at_1 && SEP_OR_END(&p1[2]);
2462
if (sep_at_2 && p1[1] == L'.') {
2463
wchar_t *p3 = p2;
2464
while (p3 != minP2 && *--p3 == SEP) { }
2465
while (p3 != minP2 && *(p3 - 1) != SEP) { --p3; }
2466
if (p2 == minP2
2467
|| (p3[0] == L'.' && p3[1] == L'.' && IS_SEP(&p3[2])))
2468
{
2469
// Previous segment is also ../, so append instead.
2470
// Relative path does not absorb ../ at minP2 as well.
2471
*p2++ = L'.';
2472
*p2++ = L'.';
2473
lastC = L'.';
2474
} else if (p3[0] == SEP) {
2475
// Absolute path, so absorb segment
2476
p2 = p3 + 1;
2477
} else {
2478
p2 = p3;
2479
}
2480
p1 += 1;
2481
} else if (sep_at_1) {
2482
} else {
2483
*p2++ = lastC = c;
2484
}
2485
} else if (c == SEP) {
2486
} else {
2487
*p2++ = lastC = c;
2488
}
2489
} else {
2490
*p2++ = lastC = c;
2491
}
2492
}
2493
*p2 = L'\0';
2494
if (p2 != minP2) {
2495
while (--p2 != minP2 && *p2 == SEP) {
2496
*p2 = L'\0';
2497
}
2498
}
2499
#undef SEP_OR_END
2500
#undef IS_SEP
2501
#undef IS_END
2502
return path;
2503
}
2504
2505
2506
/* Get the current directory. buflen is the buffer size in wide characters
2507
including the null character. Decode the path from the locale encoding.
2508
2509
Return NULL on getcwd() error, on decoding error, or if 'buf' is
2510
too short. */
2511
wchar_t*
2512
_Py_wgetcwd(wchar_t *buf, size_t buflen)
2513
{
2514
#ifdef MS_WINDOWS
2515
int ibuflen = (int)Py_MIN(buflen, INT_MAX);
2516
return _wgetcwd(buf, ibuflen);
2517
#else
2518
char fname[MAXPATHLEN];
2519
wchar_t *wname;
2520
size_t len;
2521
2522
if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
2523
return NULL;
2524
wname = Py_DecodeLocale(fname, &len);
2525
if (wname == NULL)
2526
return NULL;
2527
/* wname must have space to store the trailing NUL character */
2528
if (buflen <= len) {
2529
PyMem_RawFree(wname);
2530
return NULL;
2531
}
2532
wcsncpy(buf, wname, buflen);
2533
PyMem_RawFree(wname);
2534
return buf;
2535
#endif
2536
}
2537
2538
/* Duplicate a file descriptor. The new file descriptor is created as
2539
non-inheritable. Return a new file descriptor on success, raise an OSError
2540
exception and return -1 on error.
2541
2542
The GIL is released to call dup(). The caller must hold the GIL. */
2543
int
2544
_Py_dup(int fd)
2545
{
2546
#ifdef MS_WINDOWS
2547
HANDLE handle;
2548
#endif
2549
2550
assert(PyGILState_Check());
2551
2552
#ifdef MS_WINDOWS
2553
handle = _Py_get_osfhandle(fd);
2554
if (handle == INVALID_HANDLE_VALUE)
2555
return -1;
2556
2557
Py_BEGIN_ALLOW_THREADS
2558
_Py_BEGIN_SUPPRESS_IPH
2559
fd = dup(fd);
2560
_Py_END_SUPPRESS_IPH
2561
Py_END_ALLOW_THREADS
2562
if (fd < 0) {
2563
PyErr_SetFromErrno(PyExc_OSError);
2564
return -1;
2565
}
2566
2567
if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2568
_Py_BEGIN_SUPPRESS_IPH
2569
close(fd);
2570
_Py_END_SUPPRESS_IPH
2571
return -1;
2572
}
2573
#elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
2574
Py_BEGIN_ALLOW_THREADS
2575
_Py_BEGIN_SUPPRESS_IPH
2576
fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
2577
_Py_END_SUPPRESS_IPH
2578
Py_END_ALLOW_THREADS
2579
if (fd < 0) {
2580
PyErr_SetFromErrno(PyExc_OSError);
2581
return -1;
2582
}
2583
2584
#elif HAVE_DUP
2585
Py_BEGIN_ALLOW_THREADS
2586
_Py_BEGIN_SUPPRESS_IPH
2587
fd = dup(fd);
2588
_Py_END_SUPPRESS_IPH
2589
Py_END_ALLOW_THREADS
2590
if (fd < 0) {
2591
PyErr_SetFromErrno(PyExc_OSError);
2592
return -1;
2593
}
2594
2595
if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2596
_Py_BEGIN_SUPPRESS_IPH
2597
close(fd);
2598
_Py_END_SUPPRESS_IPH
2599
return -1;
2600
}
2601
#else
2602
errno = ENOTSUP;
2603
PyErr_SetFromErrno(PyExc_OSError);
2604
return -1;
2605
#endif
2606
return fd;
2607
}
2608
2609
#ifndef MS_WINDOWS
2610
/* Get the blocking mode of the file descriptor.
2611
Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
2612
raise an exception and return -1 on error. */
2613
int
2614
_Py_get_blocking(int fd)
2615
{
2616
int flags;
2617
_Py_BEGIN_SUPPRESS_IPH
2618
flags = fcntl(fd, F_GETFL, 0);
2619
_Py_END_SUPPRESS_IPH
2620
if (flags < 0) {
2621
PyErr_SetFromErrno(PyExc_OSError);
2622
return -1;
2623
}
2624
2625
return !(flags & O_NONBLOCK);
2626
}
2627
2628
/* Set the blocking mode of the specified file descriptor.
2629
2630
Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
2631
otherwise.
2632
2633
Return 0 on success, raise an exception and return -1 on error. */
2634
int
2635
_Py_set_blocking(int fd, int blocking)
2636
{
2637
/* bpo-41462: On VxWorks, ioctl(FIONBIO) only works on sockets.
2638
Use fcntl() instead. */
2639
#if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO) && !defined(__VXWORKS__)
2640
int arg = !blocking;
2641
if (ioctl(fd, FIONBIO, &arg) < 0)
2642
goto error;
2643
#else
2644
int flags, res;
2645
2646
_Py_BEGIN_SUPPRESS_IPH
2647
flags = fcntl(fd, F_GETFL, 0);
2648
if (flags >= 0) {
2649
if (blocking)
2650
flags = flags & (~O_NONBLOCK);
2651
else
2652
flags = flags | O_NONBLOCK;
2653
2654
res = fcntl(fd, F_SETFL, flags);
2655
} else {
2656
res = -1;
2657
}
2658
_Py_END_SUPPRESS_IPH
2659
2660
if (res < 0)
2661
goto error;
2662
#endif
2663
return 0;
2664
2665
error:
2666
PyErr_SetFromErrno(PyExc_OSError);
2667
return -1;
2668
}
2669
#else /* MS_WINDOWS */
2670
int
2671
_Py_get_blocking(int fd)
2672
{
2673
HANDLE handle;
2674
DWORD mode;
2675
BOOL success;
2676
2677
handle = _Py_get_osfhandle(fd);
2678
if (handle == INVALID_HANDLE_VALUE) {
2679
return -1;
2680
}
2681
2682
Py_BEGIN_ALLOW_THREADS
2683
success = GetNamedPipeHandleStateW(handle, &mode,
2684
NULL, NULL, NULL, NULL, 0);
2685
Py_END_ALLOW_THREADS
2686
2687
if (!success) {
2688
PyErr_SetFromWindowsErr(0);
2689
return -1;
2690
}
2691
2692
return !(mode & PIPE_NOWAIT);
2693
}
2694
2695
int
2696
_Py_set_blocking(int fd, int blocking)
2697
{
2698
HANDLE handle;
2699
DWORD mode;
2700
BOOL success;
2701
2702
handle = _Py_get_osfhandle(fd);
2703
if (handle == INVALID_HANDLE_VALUE) {
2704
return -1;
2705
}
2706
2707
Py_BEGIN_ALLOW_THREADS
2708
success = GetNamedPipeHandleStateW(handle, &mode,
2709
NULL, NULL, NULL, NULL, 0);
2710
if (success) {
2711
if (blocking) {
2712
mode &= ~PIPE_NOWAIT;
2713
}
2714
else {
2715
mode |= PIPE_NOWAIT;
2716
}
2717
success = SetNamedPipeHandleState(handle, &mode, NULL, NULL);
2718
}
2719
Py_END_ALLOW_THREADS
2720
2721
if (!success) {
2722
PyErr_SetFromWindowsErr(0);
2723
return -1;
2724
}
2725
return 0;
2726
}
2727
2728
void*
2729
_Py_get_osfhandle_noraise(int fd)
2730
{
2731
void *handle;
2732
_Py_BEGIN_SUPPRESS_IPH
2733
handle = (void*)_get_osfhandle(fd);
2734
_Py_END_SUPPRESS_IPH
2735
return handle;
2736
}
2737
2738
void*
2739
_Py_get_osfhandle(int fd)
2740
{
2741
void *handle = _Py_get_osfhandle_noraise(fd);
2742
if (handle == INVALID_HANDLE_VALUE)
2743
PyErr_SetFromErrno(PyExc_OSError);
2744
2745
return handle;
2746
}
2747
2748
int
2749
_Py_open_osfhandle_noraise(void *handle, int flags)
2750
{
2751
int fd;
2752
_Py_BEGIN_SUPPRESS_IPH
2753
fd = _open_osfhandle((intptr_t)handle, flags);
2754
_Py_END_SUPPRESS_IPH
2755
return fd;
2756
}
2757
2758
int
2759
_Py_open_osfhandle(void *handle, int flags)
2760
{
2761
int fd = _Py_open_osfhandle_noraise(handle, flags);
2762
if (fd == -1)
2763
PyErr_SetFromErrno(PyExc_OSError);
2764
2765
return fd;
2766
}
2767
#endif /* MS_WINDOWS */
2768
2769
int
2770
_Py_GetLocaleconvNumeric(struct lconv *lc,
2771
PyObject **decimal_point, PyObject **thousands_sep)
2772
{
2773
assert(decimal_point != NULL);
2774
assert(thousands_sep != NULL);
2775
2776
#ifndef MS_WINDOWS
2777
int change_locale = 0;
2778
if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
2779
change_locale = 1;
2780
}
2781
if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
2782
change_locale = 1;
2783
}
2784
2785
/* Keep a copy of the LC_CTYPE locale */
2786
char *oldloc = NULL, *loc = NULL;
2787
if (change_locale) {
2788
oldloc = setlocale(LC_CTYPE, NULL);
2789
if (!oldloc) {
2790
PyErr_SetString(PyExc_RuntimeWarning,
2791
"failed to get LC_CTYPE locale");
2792
return -1;
2793
}
2794
2795
oldloc = _PyMem_Strdup(oldloc);
2796
if (!oldloc) {
2797
PyErr_NoMemory();
2798
return -1;
2799
}
2800
2801
loc = setlocale(LC_NUMERIC, NULL);
2802
if (loc != NULL && strcmp(loc, oldloc) == 0) {
2803
loc = NULL;
2804
}
2805
2806
if (loc != NULL) {
2807
/* Only set the locale temporarily the LC_CTYPE locale
2808
if LC_NUMERIC locale is different than LC_CTYPE locale and
2809
decimal_point and/or thousands_sep are non-ASCII or longer than
2810
1 byte */
2811
setlocale(LC_CTYPE, loc);
2812
}
2813
}
2814
2815
#define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
2816
#else /* MS_WINDOWS */
2817
/* Use _W_* fields of Windows strcut lconv */
2818
#define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
2819
#endif /* MS_WINDOWS */
2820
2821
int res = -1;
2822
2823
*decimal_point = GET_LOCALE_STRING(decimal_point);
2824
if (*decimal_point == NULL) {
2825
goto done;
2826
}
2827
2828
*thousands_sep = GET_LOCALE_STRING(thousands_sep);
2829
if (*thousands_sep == NULL) {
2830
goto done;
2831
}
2832
2833
res = 0;
2834
2835
done:
2836
#ifndef MS_WINDOWS
2837
if (loc != NULL) {
2838
setlocale(LC_CTYPE, oldloc);
2839
}
2840
PyMem_Free(oldloc);
2841
#endif
2842
return res;
2843
2844
#undef GET_LOCALE_STRING
2845
}
2846
2847
/* Our selection logic for which function to use is as follows:
2848
* 1. If close_range(2) is available, always prefer that; it's better for
2849
* contiguous ranges like this than fdwalk(3) which entails iterating over
2850
* the entire fd space and simply doing nothing for those outside the range.
2851
* 2. If closefrom(2) is available, we'll attempt to use that next if we're
2852
* closing up to sysconf(_SC_OPEN_MAX).
2853
* 2a. Fallback to fdwalk(3) if we're not closing up to sysconf(_SC_OPEN_MAX),
2854
* as that will be more performant if the range happens to have any chunk of
2855
* non-opened fd in the middle.
2856
* 2b. If fdwalk(3) isn't available, just do a plain close(2) loop.
2857
*/
2858
#ifdef __FreeBSD__
2859
# define USE_CLOSEFROM
2860
#endif /* __FreeBSD__ */
2861
2862
#ifdef HAVE_FDWALK
2863
# define USE_FDWALK
2864
#endif /* HAVE_FDWALK */
2865
2866
#ifdef USE_FDWALK
2867
static int
2868
_fdwalk_close_func(void *lohi, int fd)
2869
{
2870
int lo = ((int *)lohi)[0];
2871
int hi = ((int *)lohi)[1];
2872
2873
if (fd >= hi) {
2874
return 1;
2875
}
2876
else if (fd >= lo) {
2877
/* Ignore errors */
2878
(void)close(fd);
2879
}
2880
return 0;
2881
}
2882
#endif /* USE_FDWALK */
2883
2884
/* Closes all file descriptors in [first, last], ignoring errors. */
2885
void
2886
_Py_closerange(int first, int last)
2887
{
2888
first = Py_MAX(first, 0);
2889
_Py_BEGIN_SUPPRESS_IPH
2890
#ifdef HAVE_CLOSE_RANGE
2891
if (close_range(first, last, 0) == 0) {
2892
/* close_range() ignores errors when it closes file descriptors.
2893
* Possible reasons of an error return are lack of kernel support
2894
* or denial of the underlying syscall by a seccomp sandbox on Linux.
2895
* Fallback to other methods in case of any error. */
2896
}
2897
else
2898
#endif /* HAVE_CLOSE_RANGE */
2899
#ifdef USE_CLOSEFROM
2900
if (last >= sysconf(_SC_OPEN_MAX)) {
2901
/* Any errors encountered while closing file descriptors are ignored */
2902
closefrom(first);
2903
}
2904
else
2905
#endif /* USE_CLOSEFROM */
2906
#ifdef USE_FDWALK
2907
{
2908
int lohi[2];
2909
lohi[0] = first;
2910
lohi[1] = last + 1;
2911
fdwalk(_fdwalk_close_func, lohi);
2912
}
2913
#else
2914
{
2915
for (int i = first; i <= last; i++) {
2916
/* Ignore errors */
2917
(void)close(i);
2918
}
2919
}
2920
#endif /* USE_FDWALK */
2921
_Py_END_SUPPRESS_IPH
2922
}
2923
2924